xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision bd35522dd00d2ebcbc9f656e2902fae240dc8904)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_NULL 0
27 #define JOB_FACTSYMBOLIC 1
28 #define JOB_FACTNUMERIC 2
29 #define JOB_SOLVE 3
30 #define JOB_END -2
31 
32 /* calls to MUMPS */
33 #if defined(PETSC_USE_COMPLEX)
34 #if defined(PETSC_USE_REAL_SINGLE)
35 #define MUMPS_c cmumps_c
36 #else
37 #define MUMPS_c zmumps_c
38 #endif
39 #else
40 #if defined(PETSC_USE_REAL_SINGLE)
41 #define MUMPS_c smumps_c
42 #else
43 #define MUMPS_c dmumps_c
44 #endif
45 #endif
46 
47 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
48    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
49    naming convention in PetscMPIInt, PetscBLASInt etc.
50 */
51 typedef MUMPS_INT PetscMUMPSInt;
52 
53 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
54   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
55     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
56   #endif
57 #else
58   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
59     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
60   #endif
61 #endif
62 
63 #define MPIU_MUMPSINT             MPI_INT
64 #define PETSC_MUMPS_INT_MAX       2147483647
65 #define PETSC_MUMPS_INT_MIN       -2147483648
66 
67 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
68 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
69 {
70   PetscFunctionBegin;
71 #if PetscDefined(USE_64BIT_INDICES)
72   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
73 #endif
74   *b = (PetscMUMPSInt)(a);
75   PetscFunctionReturn(0);
76 }
77 
78 /* Put these utility routines here since they are only used in this file */
79 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
80 {
81   PetscInt       myval;
82   PetscBool      myset;
83   PetscFunctionBegin;
84   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
85   PetscCall(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub));
86   if (myset) PetscCall(PetscMUMPSIntCast(myval,value));
87   if (set) *set = myset;
88   PetscFunctionReturn(0);
89 }
90 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
91 
92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
93 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
94 #define PetscMUMPS_c(mumps) \
95   do { \
96     if (mumps->use_petsc_omp_support) { \
97       if (mumps->is_omp_master) { \
98         PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
99         MUMPS_c(&mumps->id); \
100         PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
101       } \
102       PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
103       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
104          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
105          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
106          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
107       */ \
108       PetscCallMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\
109       PetscCallMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm));\
110       PetscCallMPI(MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm));\
111     } else { \
112       MUMPS_c(&mumps->id); \
113     } \
114   } while (0)
115 #else
116 #define PetscMUMPS_c(mumps) \
117   do { MUMPS_c(&mumps->id); } while (0)
118 #endif
119 
120 /* declare MumpsScalar */
121 #if defined(PETSC_USE_COMPLEX)
122 #if defined(PETSC_USE_REAL_SINGLE)
123 #define MumpsScalar mumps_complex
124 #else
125 #define MumpsScalar mumps_double_complex
126 #endif
127 #else
128 #define MumpsScalar PetscScalar
129 #endif
130 
131 /* macros s.t. indices match MUMPS documentation */
132 #define ICNTL(I) icntl[(I)-1]
133 #define CNTL(I) cntl[(I)-1]
134 #define INFOG(I) infog[(I)-1]
135 #define INFO(I) info[(I)-1]
136 #define RINFOG(I) rinfog[(I)-1]
137 #define RINFO(I) rinfo[(I)-1]
138 
139 typedef struct Mat_MUMPS Mat_MUMPS;
140 struct Mat_MUMPS {
141 #if defined(PETSC_USE_COMPLEX)
142 #if defined(PETSC_USE_REAL_SINGLE)
143   CMUMPS_STRUC_C id;
144 #else
145   ZMUMPS_STRUC_C id;
146 #endif
147 #else
148 #if defined(PETSC_USE_REAL_SINGLE)
149   SMUMPS_STRUC_C id;
150 #else
151   DMUMPS_STRUC_C id;
152 #endif
153 #endif
154 
155   MatStructure   matstruc;
156   PetscMPIInt    myid,petsc_size;
157   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
158   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
159   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
160   PetscMUMPSInt  sym;
161   MPI_Comm       mumps_comm;
162   PetscMUMPSInt  *ICNTL_pre;
163   PetscReal      *CNTL_pre;
164   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
165   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
166   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
167   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
168 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
169   PetscInt       *rhs_nrow,max_nrhs;
170   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
171   PetscScalar    *rhs_loc,*rhs_recvbuf;
172 #endif
173   Vec            b_seq,x_seq;
174   PetscInt       ninfo,*info;           /* which INFO to display */
175   PetscInt       sizeredrhs;
176   PetscScalar    *schur_sol;
177   PetscInt       schur_sizesol;
178   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
179   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
180   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
181 
182   /* stuff used by petsc/mumps OpenMP support*/
183   PetscBool      use_petsc_omp_support;
184   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
185   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
186   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
187   PetscMPIInt    tag,omp_comm_size;
188   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
189   MPI_Request    *reqs;
190 };
191 
192 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
193    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
194  */
195 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
196 {
197   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
198 
199   PetscFunctionBegin;
200 #if defined(PETSC_USE_64BIT_INDICES)
201   {
202     PetscInt i;
203     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
204       PetscCall(PetscFree(mumps->ia_alloc));
205       PetscCall(PetscMalloc1(nrow+1,&mumps->ia_alloc));
206       mumps->cur_ilen = nrow+1;
207     }
208     if (nnz > mumps->cur_jlen) {
209       PetscCall(PetscFree(mumps->ja_alloc));
210       PetscCall(PetscMalloc1(nnz,&mumps->ja_alloc));
211       mumps->cur_jlen = nnz;
212     }
213     for (i=0; i<nrow+1; i++) PetscCall(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i])));
214     for (i=0; i<nnz; i++)    PetscCall(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i])));
215     *ia_mumps = mumps->ia_alloc;
216     *ja_mumps = mumps->ja_alloc;
217   }
218 #else
219   *ia_mumps = ia;
220   *ja_mumps = ja;
221 #endif
222   PetscCall(PetscMUMPSIntCast(nnz,nnz_mumps));
223   PetscFunctionReturn(0);
224 }
225 
226 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
227 {
228   PetscFunctionBegin;
229   PetscCall(PetscFree(mumps->id.listvar_schur));
230   PetscCall(PetscFree(mumps->id.redrhs));
231   PetscCall(PetscFree(mumps->schur_sol));
232   mumps->id.size_schur = 0;
233   mumps->id.schur_lld  = 0;
234   mumps->id.ICNTL(19)  = 0;
235   PetscFunctionReturn(0);
236 }
237 
238 /* solve with rhs in mumps->id.redrhs and return in the same location */
239 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
240 {
241   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
242   Mat                  S,B,X;
243   MatFactorSchurStatus schurstatus;
244   PetscInt             sizesol;
245 
246   PetscFunctionBegin;
247   PetscCall(MatFactorFactorizeSchurComplement(F));
248   PetscCall(MatFactorGetSchurComplement(F,&S,&schurstatus));
249   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B));
250   PetscCall(MatSetType(B,((PetscObject)S)->type_name));
251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
252   PetscCall(MatBindToCPU(B,S->boundtocpu));
253 #endif
254   switch (schurstatus) {
255   case MAT_FACTOR_SCHUR_FACTORED:
256     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X));
257     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
259     PetscCall(MatBindToCPU(X,S->boundtocpu));
260 #endif
261     if (!mumps->id.ICNTL(9)) { /* transpose solve */
262       PetscCall(MatMatSolveTranspose(S,B,X));
263     } else {
264       PetscCall(MatMatSolve(S,B,X));
265     }
266     break;
267   case MAT_FACTOR_SCHUR_INVERTED:
268     sizesol = mumps->id.nrhs*mumps->id.size_schur;
269     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
270       PetscCall(PetscFree(mumps->schur_sol));
271       PetscCall(PetscMalloc1(sizesol,&mumps->schur_sol));
272       mumps->schur_sizesol = sizesol;
273     }
274     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X));
275     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
277     PetscCall(MatBindToCPU(X,S->boundtocpu));
278 #endif
279     PetscCall(MatProductCreateWithMat(S,B,NULL,X));
280     if (!mumps->id.ICNTL(9)) { /* transpose solve */
281       PetscCall(MatProductSetType(X,MATPRODUCT_AtB));
282     } else {
283       PetscCall(MatProductSetType(X,MATPRODUCT_AB));
284     }
285     PetscCall(MatProductSetFromOptions(X));
286     PetscCall(MatProductSymbolic(X));
287     PetscCall(MatProductNumeric(X));
288 
289     PetscCall(MatCopy(X,B,SAME_NONZERO_PATTERN));
290     break;
291   default:
292     SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status);
293   }
294   PetscCall(MatFactorRestoreSchurComplement(F,&S,schurstatus));
295   PetscCall(MatDestroy(&B));
296   PetscCall(MatDestroy(&X));
297   PetscFunctionReturn(0);
298 }
299 
300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
301 {
302   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
303 
304   PetscFunctionBegin;
305   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
306     PetscFunctionReturn(0);
307   }
308   if (!expansion) { /* prepare for the condensation step */
309     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
310     /* allocate MUMPS internal array to store reduced right-hand sides */
311     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
312       PetscCall(PetscFree(mumps->id.redrhs));
313       mumps->id.lredrhs = mumps->id.size_schur;
314       PetscCall(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs));
315       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
316     }
317     mumps->id.ICNTL(26) = 1; /* condensation phase */
318   } else { /* prepare for the expansion step */
319     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
320     PetscCall(MatMumpsSolveSchur_Private(F));
321     mumps->id.ICNTL(26) = 2; /* expansion phase */
322     PetscMUMPS_c(mumps);
323     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
324     /* restore defaults */
325     mumps->id.ICNTL(26) = -1;
326     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
327     if (mumps->id.nrhs > 1) {
328       PetscCall(PetscFree(mumps->id.redrhs));
329       mumps->id.lredrhs = 0;
330       mumps->sizeredrhs = 0;
331     }
332   }
333   PetscFunctionReturn(0);
334 }
335 
336 /*
337   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
338 
339   input:
340     A       - matrix in aij,baij or sbaij format
341     shift   - 0: C style output triple; 1: Fortran style output triple.
342     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
343               MAT_REUSE_MATRIX:   only the values in v array are updated
344   output:
345     nnz     - dim of r, c, and v (number of local nonzero entries of A)
346     r, c, v - row and col index, matrix values (matrix triples)
347 
348   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
349   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
350   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
351 
352  */
353 
354 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
355 {
356   const PetscScalar *av;
357   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
358   PetscInt64        nz,rnz,i,j,k;
359   PetscMUMPSInt     *row,*col;
360   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
361 
362   PetscFunctionBegin;
363   PetscCall(MatSeqAIJGetArrayRead(A,&av));
364   mumps->val = (PetscScalar*)av;
365   if (reuse == MAT_INITIAL_MATRIX) {
366     nz   = aa->nz;
367     ai   = aa->i;
368     aj   = aa->j;
369     PetscCall(PetscMalloc2(nz,&row,nz,&col));
370     for (i=k=0; i<M; i++) {
371       rnz = ai[i+1] - ai[i];
372       ajj = aj + ai[i];
373       for (j=0; j<rnz; j++) {
374         PetscCall(PetscMUMPSIntCast(i+shift,&row[k]));
375         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[k]));
376         k++;
377       }
378     }
379     mumps->irn = row;
380     mumps->jcn = col;
381     mumps->nnz = nz;
382   }
383   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
384   PetscFunctionReturn(0);
385 }
386 
387 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
388 {
389   PetscInt64     nz,i,j,k,r;
390   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
391   PetscMUMPSInt  *row,*col;
392 
393   PetscFunctionBegin;
394   mumps->val = a->val;
395   if (reuse == MAT_INITIAL_MATRIX) {
396     nz   = a->sliidx[a->totalslices];
397     PetscCall(PetscMalloc2(nz,&row,nz,&col));
398     for (i=k=0; i<a->totalslices; i++) {
399       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
400         PetscCall(PetscMUMPSIntCast(8*i+r+shift,&row[k++]));
401       }
402     }
403     for (i=0;i<nz;i++) PetscCall(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]));
404     mumps->irn = row;
405     mumps->jcn = col;
406     mumps->nnz = nz;
407   }
408   PetscFunctionReturn(0);
409 }
410 
411 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
412 {
413   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
414   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
415   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
416   PetscInt       bs;
417   PetscMUMPSInt  *row,*col;
418 
419   PetscFunctionBegin;
420   PetscCall(MatGetBlockSize(A,&bs));
421   M          = A->rmap->N/bs;
422   mumps->val = aa->a;
423   if (reuse == MAT_INITIAL_MATRIX) {
424     ai   = aa->i; aj = aa->j;
425     nz   = bs2*aa->nz;
426     PetscCall(PetscMalloc2(nz,&row,nz,&col));
427     for (i=0; i<M; i++) {
428       ajj = aj + ai[i];
429       rnz = ai[i+1] - ai[i];
430       for (k=0; k<rnz; k++) {
431         for (j=0; j<bs; j++) {
432           for (m=0; m<bs; m++) {
433             PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[idx]));
434             PetscCall(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]));
435             idx++;
436           }
437         }
438       }
439     }
440     mumps->irn = row;
441     mumps->jcn = col;
442     mumps->nnz = nz;
443   }
444   PetscFunctionReturn(0);
445 }
446 
447 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
448 {
449   const PetscInt *ai, *aj,*ajj;
450   PetscInt        bs;
451   PetscInt64      nz,rnz,i,j,k,m;
452   PetscMUMPSInt   *row,*col;
453   PetscScalar     *val;
454   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
455   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
456 #if defined(PETSC_USE_COMPLEX)
457   PetscBool       isset,hermitian;
458 #endif
459 
460   PetscFunctionBegin;
461 #if defined(PETSC_USE_COMPLEX)
462   PetscCall(MatIsHermitianKnown(A,&isset,&hermitian));
463   PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
464 #endif
465   ai   = aa->i;
466   aj   = aa->j;
467   PetscCall(MatGetBlockSize(A,&bs));
468   if (reuse == MAT_INITIAL_MATRIX) {
469     nz   = aa->nz;
470     PetscCall(PetscMalloc2(bs2*nz,&row,bs2*nz,&col));
471     if (bs>1) {
472       PetscCall(PetscMalloc1(bs2*nz,&mumps->val_alloc));
473       mumps->val = mumps->val_alloc;
474     } else {
475       mumps->val = aa->a;
476     }
477     mumps->irn = row;
478     mumps->jcn = col;
479   } else {
480     if (bs == 1) mumps->val = aa->a;
481     row = mumps->irn;
482     col = mumps->jcn;
483   }
484   val = mumps->val;
485 
486   nz = 0;
487   if (bs>1) {
488     for (i=0; i<mbs; i++) {
489       rnz = ai[i+1] - ai[i];
490       ajj = aj + ai[i];
491       for (j=0; j<rnz; j++) {
492         for (k=0; k<bs; k++) {
493           for (m=0; m<bs; m++) {
494             if (ajj[j]>i || k>=m) {
495               if (reuse == MAT_INITIAL_MATRIX) {
496                 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[nz]));
497                 PetscCall(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]));
498               }
499               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
500             }
501           }
502         }
503       }
504     }
505   } else if (reuse == MAT_INITIAL_MATRIX) {
506     for (i=0; i<mbs; i++) {
507       rnz = ai[i+1] - ai[i];
508       ajj = aj + ai[i];
509       for (j=0; j<rnz; j++) {
510         PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
511         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
512         nz++;
513       }
514     }
515     PetscCheck(nz == aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz);
516   }
517   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
518   PetscFunctionReturn(0);
519 }
520 
521 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
522 {
523   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
524   PetscInt64        nz,rnz,i,j;
525   const PetscScalar *av,*v1;
526   PetscScalar       *val;
527   PetscMUMPSInt     *row,*col;
528   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
529   PetscBool         missing;
530 #if defined(PETSC_USE_COMPLEX)
531   PetscBool         hermitian,isset;
532 #endif
533 
534   PetscFunctionBegin;
535 #if defined(PETSC_USE_COMPLEX)
536   PetscCall(MatIsHermitianKnown(A,&isset,&hermitian));
537   PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
538 #endif
539   PetscCall(MatSeqAIJGetArrayRead(A,&av));
540   ai    = aa->i; aj = aa->j;
541   adiag = aa->diag;
542   PetscCall(MatMissingDiagonal_SeqAIJ(A,&missing,NULL));
543   if (reuse == MAT_INITIAL_MATRIX) {
544     /* count nz in the upper triangular part of A */
545     nz = 0;
546     if (missing) {
547       for (i=0; i<M; i++) {
548         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
549           for (j=ai[i];j<ai[i+1];j++) {
550             if (aj[j] < i) continue;
551             nz++;
552           }
553         } else {
554           nz += ai[i+1] - adiag[i];
555         }
556       }
557     } else {
558       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
559     }
560     PetscCall(PetscMalloc2(nz,&row,nz,&col));
561     PetscCall(PetscMalloc1(nz,&val));
562     mumps->nnz = nz;
563     mumps->irn = row;
564     mumps->jcn = col;
565     mumps->val = mumps->val_alloc = val;
566 
567     nz = 0;
568     if (missing) {
569       for (i=0; i<M; i++) {
570         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
571           for (j=ai[i];j<ai[i+1];j++) {
572             if (aj[j] < i) continue;
573             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
574             PetscCall(PetscMUMPSIntCast(aj[j]+shift,&col[nz]));
575             val[nz] = av[j];
576             nz++;
577           }
578         } else {
579           rnz = ai[i+1] - adiag[i];
580           ajj = aj + adiag[i];
581           v1  = av + adiag[i];
582           for (j=0; j<rnz; j++) {
583             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
584             PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
585             val[nz++] = v1[j];
586           }
587         }
588       }
589     } else {
590       for (i=0; i<M; i++) {
591         rnz = ai[i+1] - adiag[i];
592         ajj = aj + adiag[i];
593         v1  = av + adiag[i];
594         for (j=0; j<rnz; j++) {
595           PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
596           PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
597           val[nz++] = v1[j];
598         }
599       }
600     }
601   } else {
602     nz = 0;
603     val = mumps->val;
604     if (missing) {
605       for (i=0; i <M; i++) {
606         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
607           for (j=ai[i];j<ai[i+1];j++) {
608             if (aj[j] < i) continue;
609             val[nz++] = av[j];
610           }
611         } else {
612           rnz = ai[i+1] - adiag[i];
613           v1  = av + adiag[i];
614           for (j=0; j<rnz; j++) {
615             val[nz++] = v1[j];
616           }
617         }
618       }
619     } else {
620       for (i=0; i <M; i++) {
621         rnz = ai[i+1] - adiag[i];
622         v1  = av + adiag[i];
623         for (j=0; j<rnz; j++) {
624           val[nz++] = v1[j];
625         }
626       }
627     }
628   }
629   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
630   PetscFunctionReturn(0);
631 }
632 
633 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
634 {
635   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
636   PetscInt          bs;
637   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
638   PetscMUMPSInt     *row,*col;
639   const PetscScalar *av,*bv,*v1,*v2;
640   PetscScalar       *val;
641   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
642   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
643   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
644   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
645 #if defined(PETSC_USE_COMPLEX)
646   PetscBool         hermitian,isset;
647 #endif
648 
649   PetscFunctionBegin;
650 #if defined(PETSC_USE_COMPLEX)
651   PetscCall(MatIsHermitianKnown(A,&isset,&hermitian));
652   PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
653 #endif
654   PetscCall(MatGetBlockSize(A,&bs));
655   rstart = A->rmap->rstart;
656   ai = aa->i;
657   aj = aa->j;
658   bi = bb->i;
659   bj = bb->j;
660   av = aa->a;
661   bv = bb->a;
662 
663   garray = mat->garray;
664 
665   if (reuse == MAT_INITIAL_MATRIX) {
666     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
667     PetscCall(PetscMalloc2(nz,&row,nz,&col));
668     PetscCall(PetscMalloc1(nz,&val));
669     /* can not decide the exact mumps->nnz now because of the SBAIJ */
670     mumps->irn = row;
671     mumps->jcn = col;
672     mumps->val = mumps->val_alloc = val;
673   } else {
674     val = mumps->val;
675   }
676 
677   jj = 0; irow = rstart;
678   for (i=0; i<mbs; i++) {
679     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
680     countA = ai[i+1] - ai[i];
681     countB = bi[i+1] - bi[i];
682     bjj    = bj + bi[i];
683     v1     = av + ai[i]*bs2;
684     v2     = bv + bi[i]*bs2;
685 
686     if (bs>1) {
687       /* A-part */
688       for (j=0; j<countA; j++) {
689         for (k=0; k<bs; k++) {
690           for (m=0; m<bs; m++) {
691             if (rstart + ajj[j]*bs>irow || k>=m) {
692               if (reuse == MAT_INITIAL_MATRIX) {
693                 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
694                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]));
695               }
696               val[jj++] = v1[j*bs2 + m + k*bs];
697             }
698           }
699         }
700       }
701 
702       /* B-part */
703       for (j=0; j < countB; j++) {
704         for (k=0; k<bs; k++) {
705           for (m=0; m<bs; m++) {
706             if (reuse == MAT_INITIAL_MATRIX) {
707               PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
708               PetscCall(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]));
709             }
710             val[jj++] = v2[j*bs2 + m + k*bs];
711           }
712         }
713       }
714     } else {
715       /* A-part */
716       for (j=0; j<countA; j++) {
717         if (reuse == MAT_INITIAL_MATRIX) {
718           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
719           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
720         }
721         val[jj++] = v1[j];
722       }
723 
724       /* B-part */
725       for (j=0; j < countB; j++) {
726         if (reuse == MAT_INITIAL_MATRIX) {
727           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
728           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
729         }
730         val[jj++] = v2[j];
731       }
732     }
733     irow+=bs;
734   }
735   mumps->nnz = jj;
736   PetscFunctionReturn(0);
737 }
738 
739 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
740 {
741   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
742   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
743   PetscMUMPSInt     *row,*col;
744   const PetscScalar *av, *bv,*v1,*v2;
745   PetscScalar       *val;
746   Mat               Ad,Ao;
747   Mat_SeqAIJ        *aa;
748   Mat_SeqAIJ        *bb;
749 
750   PetscFunctionBegin;
751   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
752   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
753   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
754 
755   aa = (Mat_SeqAIJ*)(Ad)->data;
756   bb = (Mat_SeqAIJ*)(Ao)->data;
757   ai = aa->i;
758   aj = aa->j;
759   bi = bb->i;
760   bj = bb->j;
761 
762   rstart = A->rmap->rstart;
763 
764   if (reuse == MAT_INITIAL_MATRIX) {
765     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
766     PetscCall(PetscMalloc2(nz,&row,nz,&col));
767     PetscCall(PetscMalloc1(nz,&val));
768     mumps->nnz = nz;
769     mumps->irn = row;
770     mumps->jcn = col;
771     mumps->val = mumps->val_alloc = val;
772   } else {
773     val = mumps->val;
774   }
775 
776   jj = 0; irow = rstart;
777   for (i=0; i<m; i++) {
778     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
779     countA = ai[i+1] - ai[i];
780     countB = bi[i+1] - bi[i];
781     bjj    = bj + bi[i];
782     v1     = av + ai[i];
783     v2     = bv + bi[i];
784 
785     /* A-part */
786     for (j=0; j<countA; j++) {
787       if (reuse == MAT_INITIAL_MATRIX) {
788         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
789         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
790       }
791       val[jj++] = v1[j];
792     }
793 
794     /* B-part */
795     for (j=0; j < countB; j++) {
796       if (reuse == MAT_INITIAL_MATRIX) {
797         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
798         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
799       }
800       val[jj++] = v2[j];
801     }
802     irow++;
803   }
804   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
805   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
806   PetscFunctionReturn(0);
807 }
808 
809 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
810 {
811   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
812   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
813   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
814   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
815   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
816   const PetscInt    bs2=mat->bs2;
817   PetscInt          bs;
818   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
819   PetscMUMPSInt     *row,*col;
820   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
821   PetscScalar       *val;
822 
823   PetscFunctionBegin;
824   PetscCall(MatGetBlockSize(A,&bs));
825   if (reuse == MAT_INITIAL_MATRIX) {
826     nz   = bs2*(aa->nz + bb->nz);
827     PetscCall(PetscMalloc2(nz,&row,nz,&col));
828     PetscCall(PetscMalloc1(nz,&val));
829     mumps->nnz = nz;
830     mumps->irn = row;
831     mumps->jcn = col;
832     mumps->val = mumps->val_alloc = val;
833   } else {
834     val = mumps->val;
835   }
836 
837   jj = 0; irow = rstart;
838   for (i=0; i<mbs; i++) {
839     countA = ai[i+1] - ai[i];
840     countB = bi[i+1] - bi[i];
841     ajj    = aj + ai[i];
842     bjj    = bj + bi[i];
843     v1     = av + bs2*ai[i];
844     v2     = bv + bs2*bi[i];
845 
846     idx = 0;
847     /* A-part */
848     for (k=0; k<countA; k++) {
849       for (j=0; j<bs; j++) {
850         for (n=0; n<bs; n++) {
851           if (reuse == MAT_INITIAL_MATRIX) {
852             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
853             PetscCall(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]));
854           }
855           val[jj++] = v1[idx++];
856         }
857       }
858     }
859 
860     idx = 0;
861     /* B-part */
862     for (k=0; k<countB; k++) {
863       for (j=0; j<bs; j++) {
864         for (n=0; n<bs; n++) {
865           if (reuse == MAT_INITIAL_MATRIX) {
866             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
867             PetscCall(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]));
868           }
869           val[jj++] = v2[idx++];
870         }
871       }
872     }
873     irow += bs;
874   }
875   PetscFunctionReturn(0);
876 }
877 
878 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
879 {
880   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
881   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
882   PetscMUMPSInt     *row,*col;
883   const PetscScalar *av, *bv,*v1,*v2;
884   PetscScalar       *val;
885   Mat               Ad,Ao;
886   Mat_SeqAIJ        *aa;
887   Mat_SeqAIJ        *bb;
888 #if defined(PETSC_USE_COMPLEX)
889   PetscBool         hermitian,isset;
890 #endif
891 
892   PetscFunctionBegin;
893 #if defined(PETSC_USE_COMPLEX)
894   PetscCall(MatIsHermitianKnown(A,&isset,&hermitian));
895   PetscCheck(!isset || !hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
896 #endif
897   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
898   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
899   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
900 
901   aa    = (Mat_SeqAIJ*)(Ad)->data;
902   bb    = (Mat_SeqAIJ*)(Ao)->data;
903   ai    = aa->i;
904   aj    = aa->j;
905   adiag = aa->diag;
906   bi    = bb->i;
907   bj    = bb->j;
908 
909   rstart = A->rmap->rstart;
910 
911   if (reuse == MAT_INITIAL_MATRIX) {
912     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
913     nzb = 0;    /* num of upper triangular entries in mat->B */
914     for (i=0; i<m; i++) {
915       nza   += (ai[i+1] - adiag[i]);
916       countB = bi[i+1] - bi[i];
917       bjj    = bj + bi[i];
918       for (j=0; j<countB; j++) {
919         if (garray[bjj[j]] > rstart) nzb++;
920       }
921     }
922 
923     nz   = nza + nzb; /* total nz of upper triangular part of mat */
924     PetscCall(PetscMalloc2(nz,&row,nz,&col));
925     PetscCall(PetscMalloc1(nz,&val));
926     mumps->nnz = nz;
927     mumps->irn = row;
928     mumps->jcn = col;
929     mumps->val = mumps->val_alloc = val;
930   } else {
931     val = mumps->val;
932   }
933 
934   jj = 0; irow = rstart;
935   for (i=0; i<m; i++) {
936     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
937     v1     = av + adiag[i];
938     countA = ai[i+1] - adiag[i];
939     countB = bi[i+1] - bi[i];
940     bjj    = bj + bi[i];
941     v2     = bv + bi[i];
942 
943     /* A-part */
944     for (j=0; j<countA; j++) {
945       if (reuse == MAT_INITIAL_MATRIX) {
946         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
947         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
948       }
949       val[jj++] = v1[j];
950     }
951 
952     /* B-part */
953     for (j=0; j < countB; j++) {
954       if (garray[bjj[j]] > rstart) {
955         if (reuse == MAT_INITIAL_MATRIX) {
956           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
957           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
958         }
959         val[jj++] = v2[j];
960       }
961     }
962     irow++;
963   }
964   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
965   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
966   PetscFunctionReturn(0);
967 }
968 
969 PetscErrorCode MatDestroy_MUMPS(Mat A)
970 {
971   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
972 
973   PetscFunctionBegin;
974   PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
975   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
976   PetscCall(VecScatterDestroy(&mumps->scat_sol));
977   PetscCall(VecDestroy(&mumps->b_seq));
978   PetscCall(VecDestroy(&mumps->x_seq));
979   PetscCall(PetscFree(mumps->id.perm_in));
980   PetscCall(PetscFree2(mumps->irn,mumps->jcn));
981   PetscCall(PetscFree(mumps->val_alloc));
982   PetscCall(PetscFree(mumps->info));
983   PetscCall(PetscFree(mumps->ICNTL_pre));
984   PetscCall(PetscFree(mumps->CNTL_pre));
985   PetscCall(MatMumpsResetSchur_Private(mumps));
986   if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */
987     mumps->id.job = JOB_END;
988     PetscMUMPS_c(mumps);
989     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
990     if (mumps->mumps_comm != MPI_COMM_NULL) {
991       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm));
992       else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm));
993     }
994   }
995 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
996   if (mumps->use_petsc_omp_support) {
997     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
998     PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
999     PetscCall(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps));
1000   }
1001 #endif
1002   PetscCall(PetscFree(mumps->ia_alloc));
1003   PetscCall(PetscFree(mumps->ja_alloc));
1004   PetscCall(PetscFree(mumps->recvcount));
1005   PetscCall(PetscFree(mumps->reqs));
1006   PetscCall(PetscFree(mumps->irhs_loc));
1007   PetscCall(PetscFree(A->data));
1008 
1009   /* clear composed functions */
1010   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1011   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL));
1012   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL));
1013   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL));
1014   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL));
1015   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL));
1016   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL));
1017   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL));
1018   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL));
1019   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL));
1020   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL));
1021   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL));
1022   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL));
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1027 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1028 {
1029   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1030   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1031   PetscInt           i,m,M,rstart;
1032 
1033   PetscFunctionBegin;
1034   PetscCall(MatGetSize(A,&M,NULL));
1035   PetscCall(MatGetLocalSize(A,&m,NULL));
1036   PetscCheck(M <= PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1037   if (ompsize == 1) {
1038     if (!mumps->irhs_loc) {
1039       mumps->nloc_rhs = m;
1040       PetscCall(PetscMalloc1(m,&mumps->irhs_loc));
1041       PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1042       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1043     }
1044     mumps->id.rhs_loc = (MumpsScalar*)array;
1045   } else {
1046   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1047     const PetscInt  *ranges;
1048     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1049     MPI_Group       petsc_group,omp_group;
1050     PetscScalar     *recvbuf=NULL;
1051 
1052     if (mumps->is_omp_master) {
1053       /* Lazily initialize the omp stuff for distributed rhs */
1054       if (!mumps->irhs_loc) {
1055         PetscCall(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks));
1056         PetscCall(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps));
1057         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group));
1058         PetscCallMPI(MPI_Comm_group(mumps->omp_comm,&omp_group));
1059         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1060         PetscCallMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks));
1061 
1062         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1063         mumps->nloc_rhs = 0;
1064         PetscCall(MatGetOwnershipRanges(A,&ranges));
1065         for (j=0; j<ompsize; j++) {
1066           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1067           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1068         }
1069         PetscCall(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc));
1070         for (j=k=0; j<ompsize; j++) {
1071           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1072         }
1073 
1074         PetscCall(PetscFree2(omp_ranks,petsc_ranks));
1075         PetscCallMPI(MPI_Group_free(&petsc_group));
1076         PetscCallMPI(MPI_Group_free(&omp_group));
1077       }
1078 
1079       /* Realloc buffers when current nrhs is bigger than what we have met */
1080       if (nrhs > mumps->max_nrhs) {
1081         PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
1082         PetscCall(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf));
1083         mumps->max_nrhs = nrhs;
1084       }
1085 
1086       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1087       for (j=0; j<ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]));
1088       mumps->rhs_disps[0] = 0;
1089       for (j=1; j<ompsize; j++) {
1090         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1091         PetscCheck(mumps->rhs_disps[j] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1092       }
1093       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1094     }
1095 
1096     PetscCall(PetscMPIIntCast(m*nrhs,&sendcount));
1097     PetscCallMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm));
1098 
1099     if (mumps->is_omp_master) {
1100       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1101         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1102         for (j=0; j<ompsize; j++) {
1103           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1104           dst = dstbase;
1105           for (i=0; i<nrhs; i++) {
1106             PetscCall(PetscArraycpy(dst,src,mumps->rhs_nrow[j]));
1107             src += mumps->rhs_nrow[j];
1108             dst += mumps->nloc_rhs;
1109           }
1110           dstbase += mumps->rhs_nrow[j];
1111         }
1112       }
1113       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1114     }
1115   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1116   }
1117   mumps->id.nrhs     = nrhs;
1118   mumps->id.nloc_rhs = mumps->nloc_rhs;
1119   mumps->id.lrhs_loc = mumps->nloc_rhs;
1120   mumps->id.irhs_loc = mumps->irhs_loc;
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1125 {
1126   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1127   const PetscScalar  *rarray = NULL;
1128   PetscScalar        *array;
1129   IS                 is_iden,is_petsc;
1130   PetscInt           i;
1131   PetscBool          second_solve = PETSC_FALSE;
1132   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1133 
1134   PetscFunctionBegin;
1135   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1));
1136   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2));
1137 
1138   if (A->factorerrortype) {
1139     PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1140     PetscCall(VecSetInf(x));
1141     PetscFunctionReturn(0);
1142   }
1143 
1144   mumps->id.nrhs = 1;
1145   if (mumps->petsc_size > 1) {
1146     if (mumps->ICNTL20 == 10) {
1147       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1148       PetscCall(VecGetArrayRead(b,&rarray));
1149       PetscCall(MatMumpsSetUpDistRHSInfo(A,1,rarray));
1150     } else {
1151       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1152       PetscCall(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1153       PetscCall(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1154       if (!mumps->myid) {
1155         PetscCall(VecGetArray(mumps->b_seq,&array));
1156         mumps->id.rhs = (MumpsScalar*)array;
1157       }
1158     }
1159   } else {  /* petsc_size == 1 */
1160     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1161     PetscCall(VecCopy(b,x));
1162     PetscCall(VecGetArray(x,&array));
1163     mumps->id.rhs = (MumpsScalar*)array;
1164   }
1165 
1166   /*
1167      handle condensation step of Schur complement (if any)
1168      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1169      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1170      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1171      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1172   */
1173   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1174     PetscCheck(mumps->petsc_size <= 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1175     second_solve = PETSC_TRUE;
1176     PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1177   }
1178   /* solve phase */
1179   /*-------------*/
1180   mumps->id.job = JOB_SOLVE;
1181   PetscMUMPS_c(mumps);
1182   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1183 
1184   /* handle expansion step of Schur complement (if any) */
1185   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1186 
1187   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1188     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1189       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1190       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1191     }
1192     if (!mumps->scat_sol) { /* create scatter scat_sol */
1193       PetscInt *isol2_loc=NULL;
1194       PetscCall(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */
1195       PetscCall(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc));
1196       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1197       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc));  /* to */
1198       PetscCall(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol));
1199       PetscCall(ISDestroy(&is_iden));
1200       PetscCall(ISDestroy(&is_petsc));
1201       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1202     }
1203 
1204     PetscCall(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1205     PetscCall(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1206   }
1207 
1208   if (mumps->petsc_size > 1) {
1209     if (mumps->ICNTL20 == 10) {
1210       PetscCall(VecRestoreArrayRead(b,&rarray));
1211     } else if (!mumps->myid) {
1212       PetscCall(VecRestoreArray(mumps->b_seq,&array));
1213     }
1214   } else PetscCall(VecRestoreArray(x,&array));
1215 
1216   PetscCall(PetscLogFlops(2.0*mumps->id.RINFO(3)));
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1221 {
1222   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1223 
1224   PetscFunctionBegin;
1225   mumps->id.ICNTL(9) = 0;
1226   PetscCall(MatSolve_MUMPS(A,b,x));
1227   mumps->id.ICNTL(9) = 1;
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1232 {
1233   Mat               Bt = NULL;
1234   PetscBool         denseX,denseB,flg,flgT;
1235   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1236   PetscInt          i,nrhs,M;
1237   PetscScalar       *array;
1238   const PetscScalar *rbray;
1239   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1240   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1241   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1242   IS                is_to,is_from;
1243   PetscInt          k,proc,j,m,myrstart;
1244   const PetscInt    *rstart;
1245   Vec               v_mpi,msol_loc;
1246   VecScatter        scat_sol;
1247   Vec               b_seq;
1248   VecScatter        scat_rhs;
1249   PetscScalar       *aa;
1250   PetscInt          spnr,*ia,*ja;
1251   Mat_MPIAIJ        *b = NULL;
1252 
1253   PetscFunctionBegin;
1254   PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL));
1255   PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1256 
1257   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL));
1258   if (denseB) {
1259     PetscCheck(B->rmap->n == X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1260     mumps->id.ICNTL(20)= 0; /* dense RHS */
1261   } else { /* sparse B */
1262     PetscCheck(X != B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1263     PetscCall(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT));
1264     if (flgT) { /* input B is transpose of actural RHS matrix,
1265                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1266       PetscCall(MatTransposeGetMat(B,&Bt));
1267     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1268     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1269   }
1270 
1271   PetscCall(MatGetSize(B,&M,&nrhs));
1272   mumps->id.nrhs = nrhs;
1273   mumps->id.lrhs = M;
1274   mumps->id.rhs  = NULL;
1275 
1276   if (mumps->petsc_size == 1) {
1277     PetscScalar *aa;
1278     PetscInt    spnr,*ia,*ja;
1279     PetscBool   second_solve = PETSC_FALSE;
1280 
1281     PetscCall(MatDenseGetArray(X,&array));
1282     mumps->id.rhs = (MumpsScalar*)array;
1283 
1284     if (denseB) {
1285       /* copy B to X */
1286       PetscCall(MatDenseGetArrayRead(B,&rbray));
1287       PetscCall(PetscArraycpy(array,rbray,M*nrhs));
1288       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1289     } else { /* sparse B */
1290       PetscCall(MatSeqAIJGetArray(Bt,&aa));
1291       PetscCall(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1292       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1293       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1294       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1295     }
1296     /* handle condensation step of Schur complement (if any) */
1297     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1298       second_solve = PETSC_TRUE;
1299       PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1300     }
1301     /* solve phase */
1302     /*-------------*/
1303     mumps->id.job = JOB_SOLVE;
1304     PetscMUMPS_c(mumps);
1305     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1306 
1307     /* handle expansion step of Schur complement (if any) */
1308     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1309     if (!denseB) { /* sparse B */
1310       PetscCall(MatSeqAIJRestoreArray(Bt,&aa));
1311       PetscCall(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1312       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1313     }
1314     PetscCall(MatDenseRestoreArray(X,&array));
1315     PetscFunctionReturn(0);
1316   }
1317 
1318   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1319   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1320 
1321   /* create msol_loc to hold mumps local solution */
1322   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1323   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1324 
1325   lsol_loc  = mumps->id.lsol_loc;
1326   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1327   PetscCall(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc));
1328   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1329   mumps->id.isol_loc = isol_loc;
1330 
1331   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc));
1332 
1333   if (denseB) {
1334     if (mumps->ICNTL20 == 10) {
1335       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1336       PetscCall(MatDenseGetArrayRead(B,&rbray));
1337       PetscCall(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray));
1338       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1339       PetscCall(MatGetLocalSize(B,&m,NULL));
1340       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi));
1341     } else {
1342       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1343       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1344         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1345         0, re-arrange B into desired order, which is a local operation.
1346       */
1347 
1348       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1349       /* wrap dense rhs matrix B into a vector v_mpi */
1350       PetscCall(MatGetLocalSize(B,&m,NULL));
1351       PetscCall(MatDenseGetArray(B,&bray));
1352       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1353       PetscCall(MatDenseRestoreArray(B,&bray));
1354 
1355       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1356       if (!mumps->myid) {
1357         PetscInt *idx;
1358         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1359         PetscCall(PetscMalloc1(nrhs*M,&idx));
1360         PetscCall(MatGetOwnershipRanges(B,&rstart));
1361         k = 0;
1362         for (proc=0; proc<mumps->petsc_size; proc++) {
1363           for (j=0; j<nrhs; j++) {
1364             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1365           }
1366         }
1367 
1368         PetscCall(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq));
1369         PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to));
1370         PetscCall(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from));
1371       } else {
1372         PetscCall(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq));
1373         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to));
1374         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from));
1375       }
1376       PetscCall(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs));
1377       PetscCall(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1378       PetscCall(ISDestroy(&is_to));
1379       PetscCall(ISDestroy(&is_from));
1380       PetscCall(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1381 
1382       if (!mumps->myid) { /* define rhs on the host */
1383         PetscCall(VecGetArray(b_seq,&bray));
1384         mumps->id.rhs = (MumpsScalar*)bray;
1385         PetscCall(VecRestoreArray(b_seq,&bray));
1386       }
1387     }
1388   } else { /* sparse B */
1389     b = (Mat_MPIAIJ*)Bt->data;
1390 
1391     /* wrap dense X into a vector v_mpi */
1392     PetscCall(MatGetLocalSize(X,&m,NULL));
1393     PetscCall(MatDenseGetArray(X,&bray));
1394     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1395     PetscCall(MatDenseRestoreArray(X,&bray));
1396 
1397     if (!mumps->myid) {
1398       PetscCall(MatSeqAIJGetArray(b->A,&aa));
1399       PetscCall(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1400       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1401       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1402       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1403     } else {
1404       mumps->id.irhs_ptr    = NULL;
1405       mumps->id.irhs_sparse = NULL;
1406       mumps->id.nz_rhs      = 0;
1407       mumps->id.rhs_sparse  = NULL;
1408     }
1409   }
1410 
1411   /* solve phase */
1412   /*-------------*/
1413   mumps->id.job = JOB_SOLVE;
1414   PetscMUMPS_c(mumps);
1415   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1416 
1417   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1418   PetscCall(MatDenseGetArray(X,&array));
1419   PetscCall(VecPlaceArray(v_mpi,array));
1420 
1421   /* create scatter scat_sol */
1422   PetscCall(MatGetOwnershipRanges(X,&rstart));
1423   /* iidx: index for scatter mumps solution to petsc X */
1424 
1425   PetscCall(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from));
1426   PetscCall(PetscMalloc1(nlsol_loc,&idxx));
1427   for (i=0; i<lsol_loc; i++) {
1428     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1429 
1430     for (proc=0; proc<mumps->petsc_size; proc++) {
1431       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1432         myrstart = rstart[proc];
1433         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1434         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1435         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1436         break;
1437       }
1438     }
1439 
1440     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1441   }
1442   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to));
1443   PetscCall(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol));
1444   PetscCall(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1445   PetscCall(ISDestroy(&is_from));
1446   PetscCall(ISDestroy(&is_to));
1447   PetscCall(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1448   PetscCall(MatDenseRestoreArray(X,&array));
1449 
1450   /* free spaces */
1451   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1452   mumps->id.isol_loc = isol_loc_save;
1453 
1454   PetscCall(PetscFree2(sol_loc,isol_loc));
1455   PetscCall(PetscFree(idxx));
1456   PetscCall(VecDestroy(&msol_loc));
1457   PetscCall(VecDestroy(&v_mpi));
1458   if (!denseB) {
1459     if (!mumps->myid) {
1460       b = (Mat_MPIAIJ*)Bt->data;
1461       PetscCall(MatSeqAIJRestoreArray(b->A,&aa));
1462       PetscCall(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1463       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1464     }
1465   } else {
1466     if (mumps->ICNTL20 == 0) {
1467       PetscCall(VecDestroy(&b_seq));
1468       PetscCall(VecScatterDestroy(&scat_rhs));
1469     }
1470   }
1471   PetscCall(VecScatterDestroy(&scat_sol));
1472   PetscCall(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3)));
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1477 {
1478   PetscBool      flg;
1479   Mat            B;
1480 
1481   PetscFunctionBegin;
1482   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
1483   PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1484 
1485   /* Create B=Bt^T that uses Bt's data structure */
1486   PetscCall(MatCreateTranspose(Bt,&B));
1487 
1488   PetscCall(MatMatSolve_MUMPS(A,B,X));
1489   PetscCall(MatDestroy(&B));
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 #if !defined(PETSC_USE_COMPLEX)
1494 /*
1495   input:
1496    F:        numeric factor
1497   output:
1498    nneg:     total number of negative pivots
1499    nzero:    total number of zero pivots
1500    npos:     (global dimension of F) - nneg - nzero
1501 */
1502 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1503 {
1504   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1505   PetscMPIInt    size;
1506 
1507   PetscFunctionBegin;
1508   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size));
1509   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1510   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13));
1511 
1512   if (nneg) *nneg = mumps->id.INFOG(12);
1513   if (nzero || npos) {
1514     PetscCheck(mumps->id.ICNTL(24) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1515     if (nzero) *nzero = mumps->id.INFOG(28);
1516     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1517   }
1518   PetscFunctionReturn(0);
1519 }
1520 #endif
1521 
1522 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1523 {
1524   PetscInt       i,nreqs;
1525   PetscMUMPSInt  *irn,*jcn;
1526   PetscMPIInt    count;
1527   PetscInt64     totnnz,remain;
1528   const PetscInt osize=mumps->omp_comm_size;
1529   PetscScalar    *val;
1530 
1531   PetscFunctionBegin;
1532   if (osize > 1) {
1533     if (reuse == MAT_INITIAL_MATRIX) {
1534       /* master first gathers counts of nonzeros to receive */
1535       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize,&mumps->recvcount));
1536       PetscCallMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm));
1537 
1538       /* Then each computes number of send/recvs */
1539       if (mumps->is_omp_master) {
1540         /* Start from 1 since self communication is not done in MPI */
1541         nreqs = 0;
1542         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1543       } else {
1544         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1545       }
1546       PetscCall(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */
1547 
1548       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1549          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1550          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1551          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1552        */
1553       nreqs = 0; /* counter for actual send/recvs */
1554       if (mumps->is_omp_master) {
1555         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1556         PetscCall(PetscMalloc2(totnnz,&irn,totnnz,&jcn));
1557         PetscCall(PetscMalloc1(totnnz,&val));
1558 
1559         /* Self communication */
1560         PetscCall(PetscArraycpy(irn,mumps->irn,mumps->nnz));
1561         PetscCall(PetscArraycpy(jcn,mumps->jcn,mumps->nnz));
1562         PetscCall(PetscArraycpy(val,mumps->val,mumps->nnz));
1563 
1564         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1565         PetscCall(PetscFree2(mumps->irn,mumps->jcn));
1566         PetscCall(PetscFree(mumps->val_alloc));
1567         mumps->nnz = totnnz;
1568         mumps->irn = irn;
1569         mumps->jcn = jcn;
1570         mumps->val = mumps->val_alloc = val;
1571 
1572         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1573         jcn += mumps->recvcount[0];
1574         val += mumps->recvcount[0];
1575 
1576         /* Remote communication */
1577         for (i=1; i<osize; i++) {
1578           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1579           remain = mumps->recvcount[i] - count;
1580           while (count>0) {
1581             PetscCallMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1582             PetscCallMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1583             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1584             irn    += count;
1585             jcn    += count;
1586             val    += count;
1587             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1588             remain -= count;
1589           }
1590         }
1591       } else {
1592         irn    = mumps->irn;
1593         jcn    = mumps->jcn;
1594         val    = mumps->val;
1595         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1596         remain = mumps->nnz - count;
1597         while (count>0) {
1598           PetscCallMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1599           PetscCallMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1600           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1601           irn    += count;
1602           jcn    += count;
1603           val    += count;
1604           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1605           remain -= count;
1606         }
1607       }
1608     } else {
1609       nreqs = 0;
1610       if (mumps->is_omp_master) {
1611         val = mumps->val + mumps->recvcount[0];
1612         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1613           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1614           remain = mumps->recvcount[i] - count;
1615           while (count>0) {
1616             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1617             val    += count;
1618             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1619             remain -= count;
1620           }
1621         }
1622       } else {
1623         val    = mumps->val;
1624         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1625         remain = mumps->nnz - count;
1626         while (count>0) {
1627           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1628           val    += count;
1629           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1630           remain -= count;
1631         }
1632       }
1633     }
1634     PetscCallMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE));
1635     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1636   }
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1641 {
1642   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1643   PetscBool      isMPIAIJ;
1644 
1645   PetscFunctionBegin;
1646   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1647     if (mumps->id.INFOG(1) == -6) {
1648       PetscCall(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1649     }
1650     PetscCall(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1651     PetscFunctionReturn(0);
1652   }
1653 
1654   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1655   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps));
1656 
1657   /* numerical factorization phase */
1658   /*-------------------------------*/
1659   mumps->id.job = JOB_FACTNUMERIC;
1660   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1661     if (!mumps->myid) {
1662       mumps->id.a = (MumpsScalar*)mumps->val;
1663     }
1664   } else {
1665     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1666   }
1667   PetscMUMPS_c(mumps);
1668   if (mumps->id.INFOG(1) < 0) {
1669     PetscCheck(!A->erroriffailure,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
1670     if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1671       PetscCall(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1672       F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1673     } else if (mumps->id.INFOG(1) == -13) {
1674       PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1675       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1676     } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1677       PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1678       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1679     } else {
1680       PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1681       F->factorerrortype = MAT_FACTOR_OTHER;
1682     }
1683   }
1684   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16));
1685 
1686   F->assembled    = PETSC_TRUE;
1687 
1688   if (F->schur) { /* reset Schur status to unfactored */
1689 #if defined(PETSC_HAVE_CUDA)
1690     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1691 #endif
1692     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1693       mumps->id.ICNTL(19) = 2;
1694       PetscCall(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur));
1695     }
1696     PetscCall(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED));
1697   }
1698 
1699   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1700   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1701 
1702   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1703   if (mumps->petsc_size > 1) {
1704     PetscInt    lsol_loc;
1705     PetscScalar *sol_loc;
1706 
1707     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ));
1708 
1709     /* distributed solution; Create x_seq=sol_loc for repeated use */
1710     if (mumps->x_seq) {
1711       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1712       PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
1713       PetscCall(VecDestroy(&mumps->x_seq));
1714     }
1715     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1716     PetscCall(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc));
1717     mumps->id.lsol_loc = lsol_loc;
1718     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1719     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq));
1720   }
1721   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1722   PetscFunctionReturn(0);
1723 }
1724 
1725 /* Sets MUMPS options from the options database */
1726 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1727 {
1728   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1729   PetscMUMPSInt  icntl=0,size,*listvar_schur;
1730   PetscInt       info[80],i,ninfo=80,rbs,cbs;
1731   PetscBool      flg=PETSC_FALSE,schur=(PetscBool)(mumps->id.ICNTL(26) == -1);
1732   MumpsScalar    *arr;
1733 
1734   PetscFunctionBegin;
1735   PetscOptionsBegin(PetscObjectComm((PetscObject)F),((PetscObject)F)->prefix,"MUMPS Options","Mat");
1736   if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */
1737     PetscInt nthreads = 0;
1738     PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
1739     PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0;
1740 
1741     mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1742     PetscCallMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size));
1743     PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */
1744 
1745     PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads","Convert MPI processes into OpenMP threads","None",&mumps->use_petsc_omp_support));
1746     if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1747     /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */
1748     PetscCall(PetscOptionsGetInt(NULL,((PetscObject)F)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL));
1749     if (mumps->use_petsc_omp_support) {
1750       PetscCheck(PetscDefined(HAVE_OPENMP_SUPPORT),PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)F)->prefix?((PetscObject)F)->prefix:"");
1751       PetscCheck(!schur,PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature",((PetscObject)F)->prefix?((PetscObject)F)->prefix:"");
1752 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1753       PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl));
1754       PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master));
1755 #endif
1756     } else {
1757       mumps->omp_comm      = PETSC_COMM_SELF;
1758       mumps->mumps_comm    = mumps->petsc_comm;
1759       mumps->is_omp_master = PETSC_TRUE;
1760     }
1761     PetscCallMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size));
1762     mumps->reqs = NULL;
1763     mumps->tag  = 0;
1764 
1765     if (mumps->mumps_comm != MPI_COMM_NULL) {
1766       if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) {
1767         /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1768         MPI_Comm comm;
1769         PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm,&comm));
1770         mumps->mumps_comm = comm;
1771       } else PetscCall(PetscCommGetComm(mumps->petsc_comm,&mumps->mumps_comm));
1772     }
1773 
1774     mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1775     mumps->id.job = JOB_INIT;
1776     mumps->id.par = 1;  /* host participates factorizaton and solve */
1777     mumps->id.sym = mumps->sym;
1778 
1779     size = mumps->id.size_schur;
1780     arr = mumps->id.schur;
1781     listvar_schur = mumps->id.listvar_schur;
1782     PetscMUMPS_c(mumps);
1783     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
1784     /* restore cached ICNTL and CNTL values */
1785     for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1+2*icntl]) = mumps->ICNTL_pre[2+2*icntl];
1786     for (icntl = 0; icntl < nCNTL_pre; ++icntl)  mumps->id.CNTL((PetscInt)mumps->CNTL_pre[1+2*icntl]) = mumps->CNTL_pre[2+2*icntl];
1787     PetscCall(PetscFree(mumps->ICNTL_pre));
1788     PetscCall(PetscFree(mumps->CNTL_pre));
1789 
1790     if (schur) {
1791       mumps->id.size_schur = size;
1792       mumps->id.schur_lld  = size;
1793       mumps->id.schur = arr;
1794       mumps->id.listvar_schur = listvar_schur;
1795       if (mumps->petsc_size > 1) {
1796         PetscBool gs; /* gs is false if any rank other than root has non-empty IS */
1797 
1798         mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
1799         gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1800         PetscCallMPI(MPI_Allreduce(MPI_IN_PLACE,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm));
1801         PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc");
1802       } else {
1803         if (F->factortype == MAT_FACTOR_LU) {
1804           mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
1805         } else {
1806           mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
1807         }
1808       }
1809       mumps->id.ICNTL(26) = -1;
1810     }
1811 
1812     /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1813        For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1814      */
1815     PetscCallMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm));
1816     PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm));
1817 
1818     mumps->scat_rhs = NULL;
1819     mumps->scat_sol = NULL;
1820 
1821     /* set PETSc-MUMPS default options - override MUMPS default */
1822     mumps->id.ICNTL(3) = 0;
1823     mumps->id.ICNTL(4) = 0;
1824     if (mumps->petsc_size == 1) {
1825       mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1826       mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1827     } else {
1828       mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1829       mumps->id.ICNTL(21) = 1;   /* distributed solution */
1830     }
1831   }
1832   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg));
1833   if (flg) mumps->id.ICNTL(1) = icntl;
1834   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg));
1835   if (flg) mumps->id.ICNTL(2) = icntl;
1836   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg));
1837   if (flg) mumps->id.ICNTL(3) = icntl;
1838 
1839   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg));
1840   if (flg) mumps->id.ICNTL(4) = icntl;
1841   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1842 
1843   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg));
1844   if (flg) mumps->id.ICNTL(6) = icntl;
1845 
1846   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg));
1847   if (flg) {
1848     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1849     mumps->id.ICNTL(7) = icntl;
1850   }
1851 
1852   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL));
1853   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1854   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL));
1855   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL));
1856   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL));
1857   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL));
1858   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL));
1859   PetscCall(MatGetBlockSizes(A,&rbs,&cbs));
1860   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1861   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15","ICNTL(15): compression of the input matrix resulting from a block format","None",mumps->id.ICNTL(15),&mumps->id.ICNTL(15),&flg));
1862   if (flg) {
1863     PetscCheck(mumps->id.ICNTL(15) <= 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Positive -mat_mumps_icntl_15 not handled");
1864     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
1865   }
1866   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL));
1867   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1868     PetscCall(MatDestroy(&F->schur));
1869     PetscCall(MatMumpsResetSchur_Private(mumps));
1870   }
1871 
1872   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1873      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1874      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1875      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1876      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1877      In short, we could not use distributed RHS with MPICH until v4.0b1.
1878    */
1879 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1880   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1881 #else
1882   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1883 #endif
1884   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg));
1885   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20);
1886 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1887   PetscCheck(!flg || mumps->ICNTL20 != 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1888 #endif
1889   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1890 
1891   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL));
1892   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL));
1893   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL));
1894   if (mumps->id.ICNTL(24)) {
1895     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1896   }
1897 
1898   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL));
1899   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL));
1900   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL));
1901   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL));
1902   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL));
1903   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1904   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL));
1905   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1906   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL));
1907   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL));
1908   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL));
1909   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL));
1910 
1911   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL));
1912   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL));
1913   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL));
1914   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL));
1915   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL));
1916   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL));
1917 
1918   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1919 
1920   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL));
1921   if (ninfo) {
1922     PetscCheck(ninfo <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo);
1923     PetscCall(PetscMalloc1(ninfo,&mumps->info));
1924     mumps->ninfo = ninfo;
1925     for (i=0; i<ninfo; i++) {
1926       PetscCheck(info[i] >= 0 && info[i] <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo);
1927       mumps->info[i] = info[i];
1928     }
1929   }
1930   PetscOptionsEnd();
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1935 {
1936   PetscFunctionBegin;
1937   if (mumps->id.INFOG(1) < 0) {
1938     PetscCheck(!A->erroriffailure,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1));
1939     if (mumps->id.INFOG(1) == -6) {
1940       PetscCall(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1941       F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1942     } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1943       PetscCall(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1944       F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1945     } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1946       PetscCall(PetscInfo(F,"Empty matrix\n"));
1947     } else {
1948       PetscCall(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1949       F->factorerrortype = MAT_FACTOR_OTHER;
1950     }
1951   }
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1956 {
1957   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1958   Vec            b;
1959   const PetscInt M = A->rmap->N;
1960 
1961   PetscFunctionBegin;
1962   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1963     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1964     PetscFunctionReturn(0);
1965   }
1966 
1967   /* Set MUMPS options from the options database */
1968   PetscCall(MatSetFromOptions_MUMPS(F,A));
1969 
1970   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1971   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
1972 
1973   /* analysis phase */
1974   /*----------------*/
1975   mumps->id.job = JOB_FACTSYMBOLIC;
1976   mumps->id.n   = M;
1977   switch (mumps->id.ICNTL(18)) {
1978   case 0:  /* centralized assembled matrix input */
1979     if (!mumps->myid) {
1980       mumps->id.nnz = mumps->nnz;
1981       mumps->id.irn = mumps->irn;
1982       mumps->id.jcn = mumps->jcn;
1983       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1984       if (r) {
1985         mumps->id.ICNTL(7) = 1;
1986         if (!mumps->myid) {
1987           const PetscInt *idx;
1988           PetscInt       i;
1989 
1990           PetscCall(PetscMalloc1(M,&mumps->id.perm_in));
1991           PetscCall(ISGetIndices(r,&idx));
1992           for (i=0; i<M; i++) PetscCall(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
1993           PetscCall(ISRestoreIndices(r,&idx));
1994         }
1995       }
1996     }
1997     break;
1998   case 3:  /* distributed assembled matrix input (size>1) */
1999     mumps->id.nnz_loc = mumps->nnz;
2000     mumps->id.irn_loc = mumps->irn;
2001     mumps->id.jcn_loc = mumps->jcn;
2002     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
2003     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2004       PetscCall(MatCreateVecs(A,NULL,&b));
2005       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2006       PetscCall(VecDestroy(&b));
2007     }
2008     break;
2009   }
2010   PetscMUMPS_c(mumps);
2011   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2012 
2013   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2014   F->ops->solve           = MatSolve_MUMPS;
2015   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2016   F->ops->matsolve        = MatMatSolve_MUMPS;
2017   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2018 
2019   mumps->matstruc = SAME_NONZERO_PATTERN;
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 /* Note the Petsc r and c permutations are ignored */
2024 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2025 {
2026   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2027   Vec            b;
2028   const PetscInt M = A->rmap->N;
2029 
2030   PetscFunctionBegin;
2031   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2032     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2033     PetscFunctionReturn(0);
2034   }
2035 
2036   /* Set MUMPS options from the options database */
2037   PetscCall(MatSetFromOptions_MUMPS(F,A));
2038 
2039   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2040   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2041 
2042   /* analysis phase */
2043   /*----------------*/
2044   mumps->id.job = JOB_FACTSYMBOLIC;
2045   mumps->id.n   = M;
2046   switch (mumps->id.ICNTL(18)) {
2047   case 0:  /* centralized assembled matrix input */
2048     if (!mumps->myid) {
2049       mumps->id.nnz = mumps->nnz;
2050       mumps->id.irn = mumps->irn;
2051       mumps->id.jcn = mumps->jcn;
2052       if (mumps->id.ICNTL(6)>1) {
2053         mumps->id.a = (MumpsScalar*)mumps->val;
2054       }
2055     }
2056     break;
2057   case 3:  /* distributed assembled matrix input (size>1) */
2058     mumps->id.nnz_loc = mumps->nnz;
2059     mumps->id.irn_loc = mumps->irn;
2060     mumps->id.jcn_loc = mumps->jcn;
2061     if (mumps->id.ICNTL(6)>1) {
2062       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2063     }
2064     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2065       PetscCall(MatCreateVecs(A,NULL,&b));
2066       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2067       PetscCall(VecDestroy(&b));
2068     }
2069     break;
2070   }
2071   PetscMUMPS_c(mumps);
2072   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2073 
2074   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2075   F->ops->solve           = MatSolve_MUMPS;
2076   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2077 
2078   mumps->matstruc = SAME_NONZERO_PATTERN;
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 /* Note the Petsc r permutation and factor info are ignored */
2083 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2084 {
2085   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2086   Vec            b;
2087   const PetscInt M = A->rmap->N;
2088 
2089   PetscFunctionBegin;
2090   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2091     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2092     PetscFunctionReturn(0);
2093   }
2094 
2095   /* Set MUMPS options from the options database */
2096   PetscCall(MatSetFromOptions_MUMPS(F,A));
2097 
2098   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2099   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2100 
2101   /* analysis phase */
2102   /*----------------*/
2103   mumps->id.job = JOB_FACTSYMBOLIC;
2104   mumps->id.n   = M;
2105   switch (mumps->id.ICNTL(18)) {
2106   case 0:  /* centralized assembled matrix input */
2107     if (!mumps->myid) {
2108       mumps->id.nnz = mumps->nnz;
2109       mumps->id.irn = mumps->irn;
2110       mumps->id.jcn = mumps->jcn;
2111       if (mumps->id.ICNTL(6)>1) {
2112         mumps->id.a = (MumpsScalar*)mumps->val;
2113       }
2114     }
2115     break;
2116   case 3:  /* distributed assembled matrix input (size>1) */
2117     mumps->id.nnz_loc = mumps->nnz;
2118     mumps->id.irn_loc = mumps->irn;
2119     mumps->id.jcn_loc = mumps->jcn;
2120     if (mumps->id.ICNTL(6)>1) {
2121       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2122     }
2123     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2124       PetscCall(MatCreateVecs(A,NULL,&b));
2125       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2126       PetscCall(VecDestroy(&b));
2127     }
2128     break;
2129   }
2130   PetscMUMPS_c(mumps);
2131   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2132 
2133   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2134   F->ops->solve                 = MatSolve_MUMPS;
2135   F->ops->solvetranspose        = MatSolve_MUMPS;
2136   F->ops->matsolve              = MatMatSolve_MUMPS;
2137   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2138 #if defined(PETSC_USE_COMPLEX)
2139   F->ops->getinertia = NULL;
2140 #else
2141   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2142 #endif
2143 
2144   mumps->matstruc = SAME_NONZERO_PATTERN;
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2149 {
2150   PetscBool         iascii;
2151   PetscViewerFormat format;
2152   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2153 
2154   PetscFunctionBegin;
2155   /* check if matrix is mumps type */
2156   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2157 
2158   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
2159   if (iascii) {
2160     PetscCall(PetscViewerGetFormat(viewer,&format));
2161     if (format == PETSC_VIEWER_ASCII_INFO) {
2162       PetscCall(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n"));
2163       PetscCall(PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d\n",mumps->id.sym));
2164       PetscCall(PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d\n",mumps->id.par));
2165       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d\n",mumps->id.ICNTL(1)));
2166       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2)));
2167       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d\n",mumps->id.ICNTL(3)));
2168       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d\n",mumps->id.ICNTL(4)));
2169       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d\n",mumps->id.ICNTL(5)));
2170       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d\n",mumps->id.ICNTL(6)));
2171       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7)));
2172       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d\n",mumps->id.ICNTL(8)));
2173       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d\n",mumps->id.ICNTL(10)));
2174       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d\n",mumps->id.ICNTL(11)));
2175       if (mumps->id.ICNTL(11)>0) {
2176         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4)));
2177         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5)));
2178         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6)));
2179         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8)));
2180         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9)));
2181         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11)));
2182       }
2183       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d\n",mumps->id.ICNTL(12)));
2184       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d\n",mumps->id.ICNTL(13)));
2185       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14)));
2186       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(15) (compression of the input matrix):            %d\n",mumps->id.ICNTL(15)));
2187       /* ICNTL(15-17) not used */
2188       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d\n",mumps->id.ICNTL(18)));
2189       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d\n",mumps->id.ICNTL(19)));
2190       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d\n",mumps->id.ICNTL(20)));
2191       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d\n",mumps->id.ICNTL(21)));
2192       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d\n",mumps->id.ICNTL(22)));
2193       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23)));
2194 
2195       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d\n",mumps->id.ICNTL(24)));
2196       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d\n",mumps->id.ICNTL(25)));
2197       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d\n",mumps->id.ICNTL(26)));
2198       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d\n",mumps->id.ICNTL(27)));
2199       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d\n",mumps->id.ICNTL(28)));
2200       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d\n",mumps->id.ICNTL(29)));
2201 
2202       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n",mumps->id.ICNTL(30)));
2203       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d\n",mumps->id.ICNTL(31)));
2204       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d\n",mumps->id.ICNTL(33)));
2205       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d\n",mumps->id.ICNTL(35)));
2206       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d\n",mumps->id.ICNTL(36)));
2207       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d\n",mumps->id.ICNTL(38)));
2208 
2209       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1)));
2210       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2)));
2211       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3)));
2212       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4)));
2213       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5)));
2214       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7)));
2215 
2216       /* information local to each processor */
2217       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n"));
2218       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2219       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1)));
2220       PetscCall(PetscViewerFlush(viewer));
2221       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n"));
2222       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2)));
2223       PetscCall(PetscViewerFlush(viewer));
2224       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n"));
2225       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3)));
2226       PetscCall(PetscViewerFlush(viewer));
2227 
2228       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n"));
2229       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d\n",mumps->myid,mumps->id.INFO(15)));
2230       PetscCall(PetscViewerFlush(viewer));
2231 
2232       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n"));
2233       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(16)));
2234       PetscCall(PetscViewerFlush(viewer));
2235 
2236       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n"));
2237       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(23)));
2238       PetscCall(PetscViewerFlush(viewer));
2239 
2240       if (mumps->ninfo && mumps->ninfo <= 80) {
2241         PetscInt i;
2242         for (i=0; i<mumps->ninfo; i++) {
2243           PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "): \n",mumps->info[i]));
2244           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i])));
2245           PetscCall(PetscViewerFlush(viewer));
2246         }
2247       }
2248       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2249 
2250       if (!mumps->myid) { /* information from the host */
2251         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1)));
2252         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2)));
2253         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3)));
2254         PetscCall(PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34)));
2255 
2256         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3)));
2257         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4)));
2258         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5)));
2259         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6)));
2260         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7)));
2261         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8)));
2262         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9)));
2263         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10)));
2264         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11)));
2265         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12)));
2266         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13)));
2267         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14)));
2268         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15)));
2269         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16)));
2270         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17)));
2271         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18)));
2272         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19)));
2273         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20)));
2274         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21)));
2275         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22)));
2276         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23)));
2277         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24)));
2278         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25)));
2279         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28)));
2280         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29)));
2281         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31)));
2282         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32)));
2283         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33)));
2284         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34)));
2285         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35)));
2286         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36)));
2287         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37)));
2288         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38)));
2289         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39)));
2290       }
2291     }
2292   }
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2297 {
2298   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2299 
2300   PetscFunctionBegin;
2301   info->block_size        = 1.0;
2302   info->nz_allocated      = mumps->id.INFOG(20);
2303   info->nz_used           = mumps->id.INFOG(20);
2304   info->nz_unneeded       = 0.0;
2305   info->assemblies        = 0.0;
2306   info->mallocs           = 0.0;
2307   info->memory            = 0.0;
2308   info->fill_ratio_given  = 0;
2309   info->fill_ratio_needed = 0;
2310   info->factor_mallocs    = 0;
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 /* -------------------------------------------------------------------------------------------*/
2315 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2316 {
2317   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2318   const PetscScalar *arr;
2319   const PetscInt    *idxs;
2320   PetscInt          size,i;
2321 
2322   PetscFunctionBegin;
2323   PetscCall(ISGetLocalSize(is,&size));
2324   /* Schur complement matrix */
2325   PetscCall(MatDestroy(&F->schur));
2326   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur));
2327   PetscCall(MatDenseGetArrayRead(F->schur,&arr));
2328   mumps->id.schur      = (MumpsScalar*)arr;
2329   mumps->id.size_schur = size;
2330   mumps->id.schur_lld  = size;
2331   PetscCall(MatDenseRestoreArrayRead(F->schur,&arr));
2332   if (mumps->sym == 1) {
2333     PetscCall(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE));
2334   }
2335 
2336   /* MUMPS expects Fortran style indices */
2337   PetscCall(PetscFree(mumps->id.listvar_schur));
2338   PetscCall(PetscMalloc1(size,&mumps->id.listvar_schur));
2339   PetscCall(ISGetIndices(is,&idxs));
2340   for (i=0; i<size; i++) PetscCall(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i])));
2341   PetscCall(ISRestoreIndices(is,&idxs));
2342   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2343   mumps->id.ICNTL(26) = -1;
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 /* -------------------------------------------------------------------------------------------*/
2348 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2349 {
2350   Mat            St;
2351   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2352   PetscScalar    *array;
2353 #if defined(PETSC_USE_COMPLEX)
2354   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2355 #endif
2356 
2357   PetscFunctionBegin;
2358   PetscCheck(mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2359   PetscCall(MatCreate(PETSC_COMM_SELF,&St));
2360   PetscCall(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur));
2361   PetscCall(MatSetType(St,MATDENSE));
2362   PetscCall(MatSetUp(St));
2363   PetscCall(MatDenseGetArray(St,&array));
2364   if (!mumps->sym) { /* MUMPS always return a full matrix */
2365     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2366       PetscInt i,j,N=mumps->id.size_schur;
2367       for (i=0;i<N;i++) {
2368         for (j=0;j<N;j++) {
2369 #if !defined(PETSC_USE_COMPLEX)
2370           PetscScalar val = mumps->id.schur[i*N+j];
2371 #else
2372           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2373 #endif
2374           array[j*N+i] = val;
2375         }
2376       }
2377     } else { /* stored by columns */
2378       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2379     }
2380   } else { /* either full or lower-triangular (not packed) */
2381     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2382       PetscInt i,j,N=mumps->id.size_schur;
2383       for (i=0;i<N;i++) {
2384         for (j=i;j<N;j++) {
2385 #if !defined(PETSC_USE_COMPLEX)
2386           PetscScalar val = mumps->id.schur[i*N+j];
2387 #else
2388           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2389 #endif
2390           array[i*N+j] = val;
2391           array[j*N+i] = val;
2392         }
2393       }
2394     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2395       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2396     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2397       PetscInt i,j,N=mumps->id.size_schur;
2398       for (i=0;i<N;i++) {
2399         for (j=0;j<i+1;j++) {
2400 #if !defined(PETSC_USE_COMPLEX)
2401           PetscScalar val = mumps->id.schur[i*N+j];
2402 #else
2403           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2404 #endif
2405           array[i*N+j] = val;
2406           array[j*N+i] = val;
2407         }
2408       }
2409     }
2410   }
2411   PetscCall(MatDenseRestoreArray(St,&array));
2412   *S   = St;
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 /* -------------------------------------------------------------------------------------------*/
2417 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2418 {
2419   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2420 
2421   PetscFunctionBegin;
2422   if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */
2423     PetscInt i,nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */
2424     for (i = 0; i < nICNTL_pre; ++i) if (mumps->ICNTL_pre[1+2*i] == icntl) break; /* is this ICNTL already cached? */
2425     if (i == nICNTL_pre) { /* not already cached */
2426       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt)*(2*nICNTL_pre + 3),&mumps->ICNTL_pre));
2427       else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt)*3,&mumps->ICNTL_pre));
2428       mumps->ICNTL_pre[0]++;
2429     }
2430     mumps->ICNTL_pre[1+2*i] = icntl;
2431     PetscCall(PetscMUMPSIntCast(ival,mumps->ICNTL_pre+2+2*i));
2432   } else PetscCall(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl)));
2433   PetscFunctionReturn(0);
2434 }
2435 
2436 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2437 {
2438   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2439 
2440   PetscFunctionBegin;
2441   *ival = mumps->id.ICNTL(icntl);
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 /*@
2446   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2447 
2448    Logically Collective on Mat
2449 
2450    Input Parameters:
2451 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2452 .  icntl - index of MUMPS parameter array ICNTL()
2453 -  ival - value of MUMPS ICNTL(icntl)
2454 
2455   Options Database:
2456 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2457 
2458    Level: beginner
2459 
2460    References:
2461 .  * - MUMPS Users' Guide
2462 
2463 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2464 @*/
2465 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2466 {
2467   PetscFunctionBegin;
2468   PetscValidType(F,1);
2469   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2470   PetscValidLogicalCollectiveInt(F,icntl,2);
2471   PetscValidLogicalCollectiveInt(F,ival,3);
2472   PetscCheck(icntl >= 1 && icntl <= 38,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported ICNTL value %" PetscInt_FMT,icntl);
2473   PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 /*@
2478   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2479 
2480    Logically Collective on Mat
2481 
2482    Input Parameters:
2483 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2484 -  icntl - index of MUMPS parameter array ICNTL()
2485 
2486   Output Parameter:
2487 .  ival - value of MUMPS ICNTL(icntl)
2488 
2489    Level: beginner
2490 
2491    References:
2492 .  * - MUMPS Users' Guide
2493 
2494 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2495 @*/
2496 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2497 {
2498   PetscFunctionBegin;
2499   PetscValidType(F,1);
2500   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2501   PetscValidLogicalCollectiveInt(F,icntl,2);
2502   PetscValidIntPointer(ival,3);
2503   PetscCheck(icntl >= 1 && icntl <= 38,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported ICNTL value %" PetscInt_FMT,icntl);
2504   PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 /* -------------------------------------------------------------------------------------------*/
2509 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2510 {
2511   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2512 
2513   PetscFunctionBegin;
2514   if (mumps->id.job == JOB_NULL) {
2515     PetscInt i,nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0;
2516     for (i = 0; i < nCNTL_pre; ++i) if (mumps->CNTL_pre[1+2*i] == icntl) break;
2517     if (i == nCNTL_pre) {
2518       if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal)*(2*nCNTL_pre + 3),&mumps->CNTL_pre));
2519       else PetscCall(PetscCalloc(sizeof(PetscReal)*3,&mumps->CNTL_pre));
2520       mumps->CNTL_pre[0]++;
2521     }
2522     mumps->CNTL_pre[1+2*i] = icntl;
2523     mumps->CNTL_pre[2+2*i] = val;
2524   } else mumps->id.CNTL(icntl) = val;
2525   PetscFunctionReturn(0);
2526 }
2527 
2528 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2529 {
2530   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2531 
2532   PetscFunctionBegin;
2533   *val = mumps->id.CNTL(icntl);
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 /*@
2538   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2539 
2540    Logically Collective on Mat
2541 
2542    Input Parameters:
2543 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2544 .  icntl - index of MUMPS parameter array CNTL()
2545 -  val - value of MUMPS CNTL(icntl)
2546 
2547   Options Database:
2548 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2549 
2550    Level: beginner
2551 
2552    References:
2553 .  * - MUMPS Users' Guide
2554 
2555 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2556 @*/
2557 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2558 {
2559   PetscFunctionBegin;
2560   PetscValidType(F,1);
2561   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2562   PetscValidLogicalCollectiveInt(F,icntl,2);
2563   PetscValidLogicalCollectiveReal(F,val,3);
2564   PetscCheck(icntl >= 1 && icntl <= 7,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported CNTL value %" PetscInt_FMT,icntl);
2565   PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 /*@
2570   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2571 
2572    Logically Collective on Mat
2573 
2574    Input Parameters:
2575 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2576 -  icntl - index of MUMPS parameter array CNTL()
2577 
2578   Output Parameter:
2579 .  val - value of MUMPS CNTL(icntl)
2580 
2581    Level: beginner
2582 
2583    References:
2584 .  * - MUMPS Users' Guide
2585 
2586 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2587 @*/
2588 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2589 {
2590   PetscFunctionBegin;
2591   PetscValidType(F,1);
2592   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2593   PetscValidLogicalCollectiveInt(F,icntl,2);
2594   PetscValidRealPointer(val,3);
2595   PetscCheck(icntl >= 1 && icntl <= 7,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONG,"Unsupported CNTL value %" PetscInt_FMT,icntl);
2596   PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2601 {
2602   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2603 
2604   PetscFunctionBegin;
2605   *info = mumps->id.INFO(icntl);
2606   PetscFunctionReturn(0);
2607 }
2608 
2609 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2610 {
2611   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2612 
2613   PetscFunctionBegin;
2614   *infog = mumps->id.INFOG(icntl);
2615   PetscFunctionReturn(0);
2616 }
2617 
2618 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2619 {
2620   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2621 
2622   PetscFunctionBegin;
2623   *rinfo = mumps->id.RINFO(icntl);
2624   PetscFunctionReturn(0);
2625 }
2626 
2627 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2628 {
2629   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2630 
2631   PetscFunctionBegin;
2632   *rinfog = mumps->id.RINFOG(icntl);
2633   PetscFunctionReturn(0);
2634 }
2635 
2636 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2637 {
2638   Mat            Bt = NULL,Btseq = NULL;
2639   PetscBool      flg;
2640   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2641   PetscScalar    *aa;
2642   PetscInt       spnr,*ia,*ja,M,nrhs;
2643 
2644   PetscFunctionBegin;
2645   PetscValidPointer(spRHS,2);
2646   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg));
2647   if (flg) {
2648     PetscCall(MatTransposeGetMat(spRHS,&Bt));
2649   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2650 
2651   PetscCall(MatMumpsSetIcntl(F,30,1));
2652 
2653   if (mumps->petsc_size > 1) {
2654     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2655     Btseq = b->A;
2656   } else {
2657     Btseq = Bt;
2658   }
2659 
2660   PetscCall(MatGetSize(spRHS,&M,&nrhs));
2661   mumps->id.nrhs = nrhs;
2662   mumps->id.lrhs = M;
2663   mumps->id.rhs  = NULL;
2664 
2665   if (!mumps->myid) {
2666     PetscCall(MatSeqAIJGetArray(Btseq,&aa));
2667     PetscCall(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2668     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2669     PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
2670     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2671   } else {
2672     mumps->id.irhs_ptr    = NULL;
2673     mumps->id.irhs_sparse = NULL;
2674     mumps->id.nz_rhs      = 0;
2675     mumps->id.rhs_sparse  = NULL;
2676   }
2677   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2678   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2679 
2680   /* solve phase */
2681   /*-------------*/
2682   mumps->id.job = JOB_SOLVE;
2683   PetscMUMPS_c(mumps);
2684   if (mumps->id.INFOG(1) < 0)
2685     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
2686 
2687   if (!mumps->myid) {
2688     PetscCall(MatSeqAIJRestoreArray(Btseq,&aa));
2689     PetscCall(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2690     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2691   }
2692   PetscFunctionReturn(0);
2693 }
2694 
2695 /*@
2696   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2697 
2698    Logically Collective on Mat
2699 
2700    Input Parameters:
2701 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2702 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2703 
2704   Output Parameter:
2705 . spRHS - requested entries of inverse of A
2706 
2707    Level: beginner
2708 
2709    References:
2710 .  * - MUMPS Users' Guide
2711 
2712 .seealso: `MatGetFactor()`, `MatCreateTranspose()`
2713 @*/
2714 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2715 {
2716   PetscFunctionBegin;
2717   PetscValidType(F,1);
2718   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2719   PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2724 {
2725   Mat            spRHS;
2726 
2727   PetscFunctionBegin;
2728   PetscCall(MatCreateTranspose(spRHST,&spRHS));
2729   PetscCall(MatMumpsGetInverse_MUMPS(F,spRHS));
2730   PetscCall(MatDestroy(&spRHS));
2731   PetscFunctionReturn(0);
2732 }
2733 
2734 /*@
2735   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2736 
2737    Logically Collective on Mat
2738 
2739    Input Parameters:
2740 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2741 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2742 
2743   Output Parameter:
2744 . spRHST - requested entries of inverse of A^T
2745 
2746    Level: beginner
2747 
2748    References:
2749 .  * - MUMPS Users' Guide
2750 
2751 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2752 @*/
2753 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2754 {
2755   PetscBool      flg;
2756 
2757   PetscFunctionBegin;
2758   PetscValidType(F,1);
2759   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2760   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
2761   PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2762 
2763   PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 /*@
2768   MatMumpsGetInfo - Get MUMPS parameter INFO()
2769 
2770    Logically Collective on Mat
2771 
2772    Input Parameters:
2773 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2774 -  icntl - index of MUMPS parameter array INFO()
2775 
2776   Output Parameter:
2777 .  ival - value of MUMPS INFO(icntl)
2778 
2779    Level: beginner
2780 
2781    References:
2782 .  * - MUMPS Users' Guide
2783 
2784 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2785 @*/
2786 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2787 {
2788   PetscFunctionBegin;
2789   PetscValidType(F,1);
2790   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2791   PetscValidIntPointer(ival,3);
2792   PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 /*@
2797   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2798 
2799    Logically Collective on Mat
2800 
2801    Input Parameters:
2802 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2803 -  icntl - index of MUMPS parameter array INFOG()
2804 
2805   Output Parameter:
2806 .  ival - value of MUMPS INFOG(icntl)
2807 
2808    Level: beginner
2809 
2810    References:
2811 .  * - MUMPS Users' Guide
2812 
2813 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2814 @*/
2815 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2816 {
2817   PetscFunctionBegin;
2818   PetscValidType(F,1);
2819   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2820   PetscValidIntPointer(ival,3);
2821   PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2822   PetscFunctionReturn(0);
2823 }
2824 
2825 /*@
2826   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2827 
2828    Logically Collective on Mat
2829 
2830    Input Parameters:
2831 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2832 -  icntl - index of MUMPS parameter array RINFO()
2833 
2834   Output Parameter:
2835 .  val - value of MUMPS RINFO(icntl)
2836 
2837    Level: beginner
2838 
2839    References:
2840 .  * - MUMPS Users' Guide
2841 
2842 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2843 @*/
2844 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2845 {
2846   PetscFunctionBegin;
2847   PetscValidType(F,1);
2848   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2849   PetscValidRealPointer(val,3);
2850   PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2851   PetscFunctionReturn(0);
2852 }
2853 
2854 /*@
2855   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2856 
2857    Logically Collective on Mat
2858 
2859    Input Parameters:
2860 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2861 -  icntl - index of MUMPS parameter array RINFOG()
2862 
2863   Output Parameter:
2864 .  val - value of MUMPS RINFOG(icntl)
2865 
2866    Level: beginner
2867 
2868    References:
2869 .  * - MUMPS Users' Guide
2870 
2871 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2872 @*/
2873 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2874 {
2875   PetscFunctionBegin;
2876   PetscValidType(F,1);
2877   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2878   PetscValidRealPointer(val,3);
2879   PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2880   PetscFunctionReturn(0);
2881 }
2882 
2883 /*MC
2884   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2885   distributed and sequential matrices via the external package MUMPS.
2886 
2887   Works with MATAIJ and MATSBAIJ matrices
2888 
2889   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2890 
2891   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2892 
2893   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2894 
2895   Options Database Keys:
2896 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2897 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2898 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2899 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2900 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2901 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2902                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2903 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2904 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2905 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2906 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2907 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2908 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2909 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
2910 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2911 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2912 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2913 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2914 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2915 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2916 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2917 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2918 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2919 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2920 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2921 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2922 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2923 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2924 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2925 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2926 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2927 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2928 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2929 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2930 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2931 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2932                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2933 
2934   Level: beginner
2935 
2936     Notes:
2937     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2938 
2939     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
2940     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
2941 
2942     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2943 $          KSPGetPC(ksp,&pc);
2944 $          PCFactorGetMatrix(pc,&mat);
2945 $          MatMumpsGetInfo(mat,....);
2946 $          MatMumpsGetInfog(mat,....); etc.
2947            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2948 
2949   Using MUMPS with 64-bit integers
2950     MUMPS provides 64-bit integer support in two build modes:
2951       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2952       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2953 
2954       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2955       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2956       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2957       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2958 
2959     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2960 
2961   Two modes to run MUMPS/PETSc with OpenMP
2962 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2963 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2964 
2965 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2966 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2967 
2968    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2969    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2970    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2971    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2972    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2973 
2974    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2975    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2976    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2977    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2978    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2979    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2980    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2981    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2982    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2983    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2984    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2985    examine the mapping result.
2986 
2987    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2988    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2989    calls omp_set_num_threads(m) internally before calling MUMPS.
2990 
2991    References:
2992 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2993 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2994 
2995 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
2996 
2997 M*/
2998 
2999 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
3000 {
3001   PetscFunctionBegin;
3002   *type = MATSOLVERMUMPS;
3003   PetscFunctionReturn(0);
3004 }
3005 
3006 /* MatGetFactor for Seq and MPI AIJ matrices */
3007 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
3008 {
3009   Mat            B;
3010   Mat_MUMPS      *mumps;
3011   PetscBool      isSeqAIJ;
3012   PetscMPIInt    size;
3013 
3014   PetscFunctionBegin;
3015  #if defined(PETSC_USE_COMPLEX)
3016   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE || ftype != MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3017  #endif
3018   /* Create the factorization matrix */
3019   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ));
3020   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3021   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3022   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3023   PetscCall(MatSetUp(B));
3024 
3025   PetscCall(PetscNewLog(B,&mumps));
3026 
3027   B->ops->view    = MatView_MUMPS;
3028   B->ops->getinfo = MatGetInfo_MUMPS;
3029 
3030   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3031   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3032   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3033   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3034   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3035   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3036   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3037   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3038   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3039   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3040   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3041   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3042   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3043 
3044   if (ftype == MAT_FACTOR_LU) {
3045     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3046     B->factortype            = MAT_FACTOR_LU;
3047     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3048     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3049     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3050     mumps->sym = 0;
3051   } else {
3052     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3053     B->factortype                  = MAT_FACTOR_CHOLESKY;
3054     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3055     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3056     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3057 #if defined(PETSC_USE_COMPLEX)
3058     mumps->sym = 2;
3059 #else
3060     if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3061     else                            mumps->sym = 2;
3062 #endif
3063   }
3064 
3065   /* set solvertype */
3066   PetscCall(PetscFree(B->solvertype));
3067   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3068   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3069   if (size == 1) {
3070     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3071     B->canuseordering = PETSC_TRUE;
3072   }
3073   B->ops->destroy = MatDestroy_MUMPS;
3074   B->data         = (void*)mumps;
3075 
3076   *F = B;
3077   mumps->id.job = JOB_NULL;
3078   mumps->ICNTL_pre = NULL;
3079   mumps->CNTL_pre = NULL;
3080   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3081   PetscFunctionReturn(0);
3082 }
3083 
3084 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3085 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3086 {
3087   Mat            B;
3088   Mat_MUMPS      *mumps;
3089   PetscBool      isSeqSBAIJ;
3090   PetscMPIInt    size;
3091 
3092   PetscFunctionBegin;
3093  #if defined(PETSC_USE_COMPLEX)
3094   PetscCheck(A->hermitian != PETSC_BOOL3_TRUE || A->symmetric == PETSC_BOOL3_TRUE,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3095  #endif
3096   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3097   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3098   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3099   PetscCall(MatSetUp(B));
3100 
3101   PetscCall(PetscNewLog(B,&mumps));
3102   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ));
3103   if (isSeqSBAIJ) {
3104     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3105   } else {
3106     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3107   }
3108 
3109   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3110   B->ops->view                   = MatView_MUMPS;
3111   B->ops->getinfo                = MatGetInfo_MUMPS;
3112 
3113   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3114   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3115   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3116   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3117   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3118   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3119   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3120   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3121   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3122   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3123   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3124   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3125   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3126 
3127   B->factortype = MAT_FACTOR_CHOLESKY;
3128 #if defined(PETSC_USE_COMPLEX)
3129   mumps->sym = 2;
3130 #else
3131   if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1;
3132   else                            mumps->sym = 2;
3133 #endif
3134 
3135   /* set solvertype */
3136   PetscCall(PetscFree(B->solvertype));
3137   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3138   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3139   if (size == 1) {
3140     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3141     B->canuseordering = PETSC_TRUE;
3142   }
3143   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3144   B->ops->destroy = MatDestroy_MUMPS;
3145   B->data         = (void*)mumps;
3146 
3147   *F = B;
3148   mumps->id.job = JOB_NULL;
3149   mumps->ICNTL_pre = NULL;
3150   mumps->CNTL_pre = NULL;
3151   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3152   PetscFunctionReturn(0);
3153 }
3154 
3155 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3156 {
3157   Mat            B;
3158   Mat_MUMPS      *mumps;
3159   PetscBool      isSeqBAIJ;
3160   PetscMPIInt    size;
3161 
3162   PetscFunctionBegin;
3163   /* Create the factorization matrix */
3164   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ));
3165   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3166   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3167   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3168   PetscCall(MatSetUp(B));
3169 
3170   PetscCall(PetscNewLog(B,&mumps));
3171   if (ftype == MAT_FACTOR_LU) {
3172     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3173     B->factortype            = MAT_FACTOR_LU;
3174     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3175     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3176     mumps->sym = 0;
3177     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3178   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3179 
3180   B->ops->view        = MatView_MUMPS;
3181   B->ops->getinfo     = MatGetInfo_MUMPS;
3182 
3183   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3184   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3185   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3186   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3187   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3188   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3189   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3190   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3191   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3192   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3193   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3194   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3195   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3196 
3197   /* set solvertype */
3198   PetscCall(PetscFree(B->solvertype));
3199   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3200   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3201   if (size == 1) {
3202     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3203     B->canuseordering = PETSC_TRUE;
3204   }
3205   B->ops->destroy = MatDestroy_MUMPS;
3206   B->data         = (void*)mumps;
3207 
3208   *F = B;
3209   mumps->id.job = JOB_NULL;
3210   mumps->ICNTL_pre = NULL;
3211   mumps->CNTL_pre = NULL;
3212   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3213   PetscFunctionReturn(0);
3214 }
3215 
3216 /* MatGetFactor for Seq and MPI SELL matrices */
3217 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3218 {
3219   Mat            B;
3220   Mat_MUMPS      *mumps;
3221   PetscBool      isSeqSELL;
3222   PetscMPIInt    size;
3223 
3224   PetscFunctionBegin;
3225   /* Create the factorization matrix */
3226   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL));
3227   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3228   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3229   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3230   PetscCall(MatSetUp(B));
3231 
3232   PetscCall(PetscNewLog(B,&mumps));
3233 
3234   B->ops->view        = MatView_MUMPS;
3235   B->ops->getinfo     = MatGetInfo_MUMPS;
3236 
3237   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3238   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3239   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3240   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3241   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3242   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3243   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3244   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3245   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3246   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3247   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3248 
3249   if (ftype == MAT_FACTOR_LU) {
3250     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3251     B->factortype            = MAT_FACTOR_LU;
3252     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3253     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3254     mumps->sym = 0;
3255     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3256   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3257 
3258   /* set solvertype */
3259   PetscCall(PetscFree(B->solvertype));
3260   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3261   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3262   if (size == 1) {
3263     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3264     B->canuseordering = PETSC_TRUE;
3265   }
3266   B->ops->destroy = MatDestroy_MUMPS;
3267   B->data         = (void*)mumps;
3268 
3269   *F = B;
3270   mumps->id.job = JOB_NULL;
3271   mumps->ICNTL_pre = NULL;
3272   mumps->CNTL_pre = NULL;
3273   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3274   PetscFunctionReturn(0);
3275 }
3276 
3277 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3278 {
3279   PetscFunctionBegin;
3280   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3281   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3282   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3283   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3284   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3285   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3286   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3287   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3288   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3289   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3290   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps));
3291   PetscFunctionReturn(0);
3292 }
3293