xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision c3b5f7ba6bc5ce25a01a67bb37ba5d34b02bbbd7)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30 
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45 
46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48    naming convention in PetscMPIInt, PetscBLASInt etc.
49 */
50 typedef MUMPS_INT PetscMUMPSInt;
51 
52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55   #endif
56 #else
57   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #endif
61 
62 #define MPIU_MUMPSINT             MPI_INT
63 #define PETSC_MUMPS_INT_MAX       2147483647
64 #define PETSC_MUMPS_INT_MIN       -2147483648
65 
66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68 {
69   PetscFunctionBegin;
70 #if PetscDefined(USE_64BIT_INDICES)
71   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
72 #endif
73   *b = (PetscMUMPSInt)(a);
74   PetscFunctionReturn(0);
75 }
76 
77 /* Put these utility routines here since they are only used in this file */
78 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
79 {
80   PetscInt       myval;
81   PetscBool      myset;
82   PetscFunctionBegin;
83   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
84   PetscCall(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub));
85   if (myset) PetscCall(PetscMUMPSIntCast(myval,value));
86   if (set) *set = myset;
87   PetscFunctionReturn(0);
88 }
89 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
90 
91 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
92 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
93 #define PetscMUMPS_c(mumps) \
94   do { \
95     if (mumps->use_petsc_omp_support) { \
96       if (mumps->is_omp_master) { \
97         PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
98         MUMPS_c(&mumps->id); \
99         PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
100       } \
101       PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
102       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
103          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
104          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
105          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
106       */ \
107       PetscCallMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\
108       PetscCallMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm));\
109       PetscCallMPI(MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm));\
110     } else { \
111       MUMPS_c(&mumps->id); \
112     } \
113   } while (0)
114 #else
115 #define PetscMUMPS_c(mumps) \
116   do { MUMPS_c(&mumps->id); } while (0)
117 #endif
118 
119 /* declare MumpsScalar */
120 #if defined(PETSC_USE_COMPLEX)
121 #if defined(PETSC_USE_REAL_SINGLE)
122 #define MumpsScalar mumps_complex
123 #else
124 #define MumpsScalar mumps_double_complex
125 #endif
126 #else
127 #define MumpsScalar PetscScalar
128 #endif
129 
130 /* macros s.t. indices match MUMPS documentation */
131 #define ICNTL(I) icntl[(I)-1]
132 #define CNTL(I) cntl[(I)-1]
133 #define INFOG(I) infog[(I)-1]
134 #define INFO(I) info[(I)-1]
135 #define RINFOG(I) rinfog[(I)-1]
136 #define RINFO(I) rinfo[(I)-1]
137 
138 typedef struct Mat_MUMPS Mat_MUMPS;
139 struct Mat_MUMPS {
140 #if defined(PETSC_USE_COMPLEX)
141 #if defined(PETSC_USE_REAL_SINGLE)
142   CMUMPS_STRUC_C id;
143 #else
144   ZMUMPS_STRUC_C id;
145 #endif
146 #else
147 #if defined(PETSC_USE_REAL_SINGLE)
148   SMUMPS_STRUC_C id;
149 #else
150   DMUMPS_STRUC_C id;
151 #endif
152 #endif
153 
154   MatStructure   matstruc;
155   PetscMPIInt    myid,petsc_size;
156   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
157   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
158   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
159   PetscMUMPSInt  sym;
160   MPI_Comm       mumps_comm;
161   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
162   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
163   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
164   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
165 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
166   PetscInt       *rhs_nrow,max_nrhs;
167   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
168   PetscScalar    *rhs_loc,*rhs_recvbuf;
169 #endif
170   Vec            b_seq,x_seq;
171   PetscInt       ninfo,*info;           /* which INFO to display */
172   PetscInt       sizeredrhs;
173   PetscScalar    *schur_sol;
174   PetscInt       schur_sizesol;
175   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
176   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
177   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
178 
179   /* stuff used by petsc/mumps OpenMP support*/
180   PetscBool      use_petsc_omp_support;
181   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
182   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
183   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
184   PetscMPIInt    tag,omp_comm_size;
185   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
186   MPI_Request    *reqs;
187 };
188 
189 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
190    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
191  */
192 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
193 {
194   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_USE_64BIT_INDICES)
198   {
199     PetscInt i;
200     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201       PetscCall(PetscFree(mumps->ia_alloc));
202       PetscCall(PetscMalloc1(nrow+1,&mumps->ia_alloc));
203       mumps->cur_ilen = nrow+1;
204     }
205     if (nnz > mumps->cur_jlen) {
206       PetscCall(PetscFree(mumps->ja_alloc));
207       PetscCall(PetscMalloc1(nnz,&mumps->ja_alloc));
208       mumps->cur_jlen = nnz;
209     }
210     for (i=0; i<nrow+1; i++) PetscCall(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i])));
211     for (i=0; i<nnz; i++)    PetscCall(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i])));
212     *ia_mumps = mumps->ia_alloc;
213     *ja_mumps = mumps->ja_alloc;
214   }
215 #else
216   *ia_mumps = ia;
217   *ja_mumps = ja;
218 #endif
219   PetscCall(PetscMUMPSIntCast(nnz,nnz_mumps));
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
224 {
225   PetscFunctionBegin;
226   PetscCall(PetscFree(mumps->id.listvar_schur));
227   PetscCall(PetscFree(mumps->id.redrhs));
228   PetscCall(PetscFree(mumps->schur_sol));
229   mumps->id.size_schur = 0;
230   mumps->id.schur_lld  = 0;
231   mumps->id.ICNTL(19)  = 0;
232   PetscFunctionReturn(0);
233 }
234 
235 /* solve with rhs in mumps->id.redrhs and return in the same location */
236 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
237 {
238   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
239   Mat                  S,B,X;
240   MatFactorSchurStatus schurstatus;
241   PetscInt             sizesol;
242 
243   PetscFunctionBegin;
244   PetscCall(MatFactorFactorizeSchurComplement(F));
245   PetscCall(MatFactorGetSchurComplement(F,&S,&schurstatus));
246   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B));
247   PetscCall(MatSetType(B,((PetscObject)S)->type_name));
248 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
249   PetscCall(MatBindToCPU(B,S->boundtocpu));
250 #endif
251   switch (schurstatus) {
252   case MAT_FACTOR_SCHUR_FACTORED:
253     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X));
254     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
255 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
256     PetscCall(MatBindToCPU(X,S->boundtocpu));
257 #endif
258     if (!mumps->id.ICNTL(9)) { /* transpose solve */
259       PetscCall(MatMatSolveTranspose(S,B,X));
260     } else {
261       PetscCall(MatMatSolve(S,B,X));
262     }
263     break;
264   case MAT_FACTOR_SCHUR_INVERTED:
265     sizesol = mumps->id.nrhs*mumps->id.size_schur;
266     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
267       PetscCall(PetscFree(mumps->schur_sol));
268       PetscCall(PetscMalloc1(sizesol,&mumps->schur_sol));
269       mumps->schur_sizesol = sizesol;
270     }
271     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X));
272     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
274     PetscCall(MatBindToCPU(X,S->boundtocpu));
275 #endif
276     PetscCall(MatProductCreateWithMat(S,B,NULL,X));
277     if (!mumps->id.ICNTL(9)) { /* transpose solve */
278       PetscCall(MatProductSetType(X,MATPRODUCT_AtB));
279     } else {
280       PetscCall(MatProductSetType(X,MATPRODUCT_AB));
281     }
282     PetscCall(MatProductSetFromOptions(X));
283     PetscCall(MatProductSymbolic(X));
284     PetscCall(MatProductNumeric(X));
285 
286     PetscCall(MatCopy(X,B,SAME_NONZERO_PATTERN));
287     break;
288   default:
289     SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status);
290   }
291   PetscCall(MatFactorRestoreSchurComplement(F,&S,schurstatus));
292   PetscCall(MatDestroy(&B));
293   PetscCall(MatDestroy(&X));
294   PetscFunctionReturn(0);
295 }
296 
297 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
298 {
299   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
300 
301   PetscFunctionBegin;
302   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
303     PetscFunctionReturn(0);
304   }
305   if (!expansion) { /* prepare for the condensation step */
306     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
307     /* allocate MUMPS internal array to store reduced right-hand sides */
308     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
309       PetscCall(PetscFree(mumps->id.redrhs));
310       mumps->id.lredrhs = mumps->id.size_schur;
311       PetscCall(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs));
312       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
313     }
314     mumps->id.ICNTL(26) = 1; /* condensation phase */
315   } else { /* prepare for the expansion step */
316     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
317     PetscCall(MatMumpsSolveSchur_Private(F));
318     mumps->id.ICNTL(26) = 2; /* expansion phase */
319     PetscMUMPS_c(mumps);
320     PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
321     /* restore defaults */
322     mumps->id.ICNTL(26) = -1;
323     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
324     if (mumps->id.nrhs > 1) {
325       PetscCall(PetscFree(mumps->id.redrhs));
326       mumps->id.lredrhs = 0;
327       mumps->sizeredrhs = 0;
328     }
329   }
330   PetscFunctionReturn(0);
331 }
332 
333 /*
334   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
335 
336   input:
337     A       - matrix in aij,baij or sbaij format
338     shift   - 0: C style output triple; 1: Fortran style output triple.
339     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
340               MAT_REUSE_MATRIX:   only the values in v array are updated
341   output:
342     nnz     - dim of r, c, and v (number of local nonzero entries of A)
343     r, c, v - row and col index, matrix values (matrix triples)
344 
345   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
346   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
347   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
348 
349  */
350 
351 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
352 {
353   const PetscScalar *av;
354   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
355   PetscInt64        nz,rnz,i,j,k;
356   PetscMUMPSInt     *row,*col;
357   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
358 
359   PetscFunctionBegin;
360   PetscCall(MatSeqAIJGetArrayRead(A,&av));
361   mumps->val = (PetscScalar*)av;
362   if (reuse == MAT_INITIAL_MATRIX) {
363     nz   = aa->nz;
364     ai   = aa->i;
365     aj   = aa->j;
366     PetscCall(PetscMalloc2(nz,&row,nz,&col));
367     for (i=k=0; i<M; i++) {
368       rnz = ai[i+1] - ai[i];
369       ajj = aj + ai[i];
370       for (j=0; j<rnz; j++) {
371         PetscCall(PetscMUMPSIntCast(i+shift,&row[k]));
372         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[k]));
373         k++;
374       }
375     }
376     mumps->irn = row;
377     mumps->jcn = col;
378     mumps->nnz = nz;
379   }
380   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
381   PetscFunctionReturn(0);
382 }
383 
384 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
385 {
386   PetscInt64     nz,i,j,k,r;
387   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
388   PetscMUMPSInt  *row,*col;
389 
390   PetscFunctionBegin;
391   mumps->val = a->val;
392   if (reuse == MAT_INITIAL_MATRIX) {
393     nz   = a->sliidx[a->totalslices];
394     PetscCall(PetscMalloc2(nz,&row,nz,&col));
395     for (i=k=0; i<a->totalslices; i++) {
396       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
397         PetscCall(PetscMUMPSIntCast(8*i+r+shift,&row[k++]));
398       }
399     }
400     for (i=0;i<nz;i++) PetscCall(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]));
401     mumps->irn = row;
402     mumps->jcn = col;
403     mumps->nnz = nz;
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
409 {
410   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
411   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
412   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
413   PetscInt       bs;
414   PetscMUMPSInt  *row,*col;
415 
416   PetscFunctionBegin;
417   PetscCall(MatGetBlockSize(A,&bs));
418   M          = A->rmap->N/bs;
419   mumps->val = aa->a;
420   if (reuse == MAT_INITIAL_MATRIX) {
421     ai   = aa->i; aj = aa->j;
422     nz   = bs2*aa->nz;
423     PetscCall(PetscMalloc2(nz,&row,nz,&col));
424     for (i=0; i<M; i++) {
425       ajj = aj + ai[i];
426       rnz = ai[i+1] - ai[i];
427       for (k=0; k<rnz; k++) {
428         for (j=0; j<bs; j++) {
429           for (m=0; m<bs; m++) {
430             PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[idx]));
431             PetscCall(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]));
432             idx++;
433           }
434         }
435       }
436     }
437     mumps->irn = row;
438     mumps->jcn = col;
439     mumps->nnz = nz;
440   }
441   PetscFunctionReturn(0);
442 }
443 
444 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
445 {
446   const PetscInt *ai, *aj,*ajj;
447   PetscInt        bs;
448   PetscInt64      nz,rnz,i,j,k,m;
449   PetscMUMPSInt   *row,*col;
450   PetscScalar     *val;
451   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
452   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
453 #if defined(PETSC_USE_COMPLEX)
454   PetscBool       hermitian;
455 #endif
456 
457   PetscFunctionBegin;
458 #if defined(PETSC_USE_COMPLEX)
459   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
460   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
461 #endif
462   ai   = aa->i;
463   aj   = aa->j;
464   PetscCall(MatGetBlockSize(A,&bs));
465   if (reuse == MAT_INITIAL_MATRIX) {
466     nz   = aa->nz;
467     PetscCall(PetscMalloc2(bs2*nz,&row,bs2*nz,&col));
468     if (bs>1) {
469       PetscCall(PetscMalloc1(bs2*nz,&mumps->val_alloc));
470       mumps->val = mumps->val_alloc;
471     } else {
472       mumps->val = aa->a;
473     }
474     mumps->irn = row;
475     mumps->jcn = col;
476   } else {
477     if (bs == 1) mumps->val = aa->a;
478     row = mumps->irn;
479     col = mumps->jcn;
480   }
481   val = mumps->val;
482 
483   nz = 0;
484   if (bs>1) {
485     for (i=0; i<mbs; i++) {
486       rnz = ai[i+1] - ai[i];
487       ajj = aj + ai[i];
488       for (j=0; j<rnz; j++) {
489         for (k=0; k<bs; k++) {
490           for (m=0; m<bs; m++) {
491             if (ajj[j]>i || k>=m) {
492               if (reuse == MAT_INITIAL_MATRIX) {
493                 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[nz]));
494                 PetscCall(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]));
495               }
496               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
497             }
498           }
499         }
500       }
501     }
502   } else if (reuse == MAT_INITIAL_MATRIX) {
503     for (i=0; i<mbs; i++) {
504       rnz = ai[i+1] - ai[i];
505       ajj = aj + ai[i];
506       for (j=0; j<rnz; j++) {
507         PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
508         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
509         nz++;
510       }
511     }
512     PetscCheckFalse(nz != aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz);
513   }
514   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
515   PetscFunctionReturn(0);
516 }
517 
518 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
519 {
520   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
521   PetscInt64        nz,rnz,i,j;
522   const PetscScalar *av,*v1;
523   PetscScalar       *val;
524   PetscMUMPSInt     *row,*col;
525   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
526   PetscBool         missing;
527 #if defined(PETSC_USE_COMPLEX)
528   PetscBool         hermitian;
529 #endif
530 
531   PetscFunctionBegin;
532 #if defined(PETSC_USE_COMPLEX)
533   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
534   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
535 #endif
536   PetscCall(MatSeqAIJGetArrayRead(A,&av));
537   ai    = aa->i; aj = aa->j;
538   adiag = aa->diag;
539   PetscCall(MatMissingDiagonal_SeqAIJ(A,&missing,NULL));
540   if (reuse == MAT_INITIAL_MATRIX) {
541     /* count nz in the upper triangular part of A */
542     nz = 0;
543     if (missing) {
544       for (i=0; i<M; i++) {
545         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
546           for (j=ai[i];j<ai[i+1];j++) {
547             if (aj[j] < i) continue;
548             nz++;
549           }
550         } else {
551           nz += ai[i+1] - adiag[i];
552         }
553       }
554     } else {
555       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
556     }
557     PetscCall(PetscMalloc2(nz,&row,nz,&col));
558     PetscCall(PetscMalloc1(nz,&val));
559     mumps->nnz = nz;
560     mumps->irn = row;
561     mumps->jcn = col;
562     mumps->val = mumps->val_alloc = val;
563 
564     nz = 0;
565     if (missing) {
566       for (i=0; i<M; i++) {
567         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
568           for (j=ai[i];j<ai[i+1];j++) {
569             if (aj[j] < i) continue;
570             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
571             PetscCall(PetscMUMPSIntCast(aj[j]+shift,&col[nz]));
572             val[nz] = av[j];
573             nz++;
574           }
575         } else {
576           rnz = ai[i+1] - adiag[i];
577           ajj = aj + adiag[i];
578           v1  = av + adiag[i];
579           for (j=0; j<rnz; j++) {
580             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
581             PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
582             val[nz++] = v1[j];
583           }
584         }
585       }
586     } else {
587       for (i=0; i<M; i++) {
588         rnz = ai[i+1] - adiag[i];
589         ajj = aj + adiag[i];
590         v1  = av + adiag[i];
591         for (j=0; j<rnz; j++) {
592           PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
593           PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
594           val[nz++] = v1[j];
595         }
596       }
597     }
598   } else {
599     nz = 0;
600     val = mumps->val;
601     if (missing) {
602       for (i=0; i <M; i++) {
603         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
604           for (j=ai[i];j<ai[i+1];j++) {
605             if (aj[j] < i) continue;
606             val[nz++] = av[j];
607           }
608         } else {
609           rnz = ai[i+1] - adiag[i];
610           v1  = av + adiag[i];
611           for (j=0; j<rnz; j++) {
612             val[nz++] = v1[j];
613           }
614         }
615       }
616     } else {
617       for (i=0; i <M; i++) {
618         rnz = ai[i+1] - adiag[i];
619         v1  = av + adiag[i];
620         for (j=0; j<rnz; j++) {
621           val[nz++] = v1[j];
622         }
623       }
624     }
625   }
626   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
627   PetscFunctionReturn(0);
628 }
629 
630 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
631 {
632   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
633   PetscInt          bs;
634   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
635   PetscMUMPSInt     *row,*col;
636   const PetscScalar *av,*bv,*v1,*v2;
637   PetscScalar       *val;
638   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
639   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
640   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
641   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
642 #if defined(PETSC_USE_COMPLEX)
643   PetscBool         hermitian;
644 #endif
645 
646   PetscFunctionBegin;
647 #if defined(PETSC_USE_COMPLEX)
648   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
649   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
650 #endif
651   PetscCall(MatGetBlockSize(A,&bs));
652   rstart = A->rmap->rstart;
653   ai = aa->i;
654   aj = aa->j;
655   bi = bb->i;
656   bj = bb->j;
657   av = aa->a;
658   bv = bb->a;
659 
660   garray = mat->garray;
661 
662   if (reuse == MAT_INITIAL_MATRIX) {
663     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
664     PetscCall(PetscMalloc2(nz,&row,nz,&col));
665     PetscCall(PetscMalloc1(nz,&val));
666     /* can not decide the exact mumps->nnz now because of the SBAIJ */
667     mumps->irn = row;
668     mumps->jcn = col;
669     mumps->val = mumps->val_alloc = val;
670   } else {
671     val = mumps->val;
672   }
673 
674   jj = 0; irow = rstart;
675   for (i=0; i<mbs; i++) {
676     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
677     countA = ai[i+1] - ai[i];
678     countB = bi[i+1] - bi[i];
679     bjj    = bj + bi[i];
680     v1     = av + ai[i]*bs2;
681     v2     = bv + bi[i]*bs2;
682 
683     if (bs>1) {
684       /* A-part */
685       for (j=0; j<countA; j++) {
686         for (k=0; k<bs; k++) {
687           for (m=0; m<bs; m++) {
688             if (rstart + ajj[j]*bs>irow || k>=m) {
689               if (reuse == MAT_INITIAL_MATRIX) {
690                 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
691                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]));
692               }
693               val[jj++] = v1[j*bs2 + m + k*bs];
694             }
695           }
696         }
697       }
698 
699       /* B-part */
700       for (j=0; j < countB; j++) {
701         for (k=0; k<bs; k++) {
702           for (m=0; m<bs; m++) {
703             if (reuse == MAT_INITIAL_MATRIX) {
704               PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
705               PetscCall(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]));
706             }
707             val[jj++] = v2[j*bs2 + m + k*bs];
708           }
709         }
710       }
711     } else {
712       /* A-part */
713       for (j=0; j<countA; j++) {
714         if (reuse == MAT_INITIAL_MATRIX) {
715           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
716           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
717         }
718         val[jj++] = v1[j];
719       }
720 
721       /* B-part */
722       for (j=0; j < countB; j++) {
723         if (reuse == MAT_INITIAL_MATRIX) {
724           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
725           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
726         }
727         val[jj++] = v2[j];
728       }
729     }
730     irow+=bs;
731   }
732   mumps->nnz = jj;
733   PetscFunctionReturn(0);
734 }
735 
736 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
737 {
738   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
739   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
740   PetscMUMPSInt     *row,*col;
741   const PetscScalar *av, *bv,*v1,*v2;
742   PetscScalar       *val;
743   Mat               Ad,Ao;
744   Mat_SeqAIJ        *aa;
745   Mat_SeqAIJ        *bb;
746 
747   PetscFunctionBegin;
748   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
749   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
750   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
751 
752   aa = (Mat_SeqAIJ*)(Ad)->data;
753   bb = (Mat_SeqAIJ*)(Ao)->data;
754   ai = aa->i;
755   aj = aa->j;
756   bi = bb->i;
757   bj = bb->j;
758 
759   rstart = A->rmap->rstart;
760 
761   if (reuse == MAT_INITIAL_MATRIX) {
762     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
763     PetscCall(PetscMalloc2(nz,&row,nz,&col));
764     PetscCall(PetscMalloc1(nz,&val));
765     mumps->nnz = nz;
766     mumps->irn = row;
767     mumps->jcn = col;
768     mumps->val = mumps->val_alloc = val;
769   } else {
770     val = mumps->val;
771   }
772 
773   jj = 0; irow = rstart;
774   for (i=0; i<m; i++) {
775     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
776     countA = ai[i+1] - ai[i];
777     countB = bi[i+1] - bi[i];
778     bjj    = bj + bi[i];
779     v1     = av + ai[i];
780     v2     = bv + bi[i];
781 
782     /* A-part */
783     for (j=0; j<countA; j++) {
784       if (reuse == MAT_INITIAL_MATRIX) {
785         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
786         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
787       }
788       val[jj++] = v1[j];
789     }
790 
791     /* B-part */
792     for (j=0; j < countB; j++) {
793       if (reuse == MAT_INITIAL_MATRIX) {
794         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
795         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
796       }
797       val[jj++] = v2[j];
798     }
799     irow++;
800   }
801   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
802   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
803   PetscFunctionReturn(0);
804 }
805 
806 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
807 {
808   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
809   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
810   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
811   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
812   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
813   const PetscInt    bs2=mat->bs2;
814   PetscInt          bs;
815   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
816   PetscMUMPSInt     *row,*col;
817   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
818   PetscScalar       *val;
819 
820   PetscFunctionBegin;
821   PetscCall(MatGetBlockSize(A,&bs));
822   if (reuse == MAT_INITIAL_MATRIX) {
823     nz   = bs2*(aa->nz + bb->nz);
824     PetscCall(PetscMalloc2(nz,&row,nz,&col));
825     PetscCall(PetscMalloc1(nz,&val));
826     mumps->nnz = nz;
827     mumps->irn = row;
828     mumps->jcn = col;
829     mumps->val = mumps->val_alloc = val;
830   } else {
831     val = mumps->val;
832   }
833 
834   jj = 0; irow = rstart;
835   for (i=0; i<mbs; i++) {
836     countA = ai[i+1] - ai[i];
837     countB = bi[i+1] - bi[i];
838     ajj    = aj + ai[i];
839     bjj    = bj + bi[i];
840     v1     = av + bs2*ai[i];
841     v2     = bv + bs2*bi[i];
842 
843     idx = 0;
844     /* A-part */
845     for (k=0; k<countA; k++) {
846       for (j=0; j<bs; j++) {
847         for (n=0; n<bs; n++) {
848           if (reuse == MAT_INITIAL_MATRIX) {
849             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
850             PetscCall(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]));
851           }
852           val[jj++] = v1[idx++];
853         }
854       }
855     }
856 
857     idx = 0;
858     /* B-part */
859     for (k=0; k<countB; k++) {
860       for (j=0; j<bs; j++) {
861         for (n=0; n<bs; n++) {
862           if (reuse == MAT_INITIAL_MATRIX) {
863             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
864             PetscCall(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]));
865           }
866           val[jj++] = v2[idx++];
867         }
868       }
869     }
870     irow += bs;
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
876 {
877   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
878   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
879   PetscMUMPSInt     *row,*col;
880   const PetscScalar *av, *bv,*v1,*v2;
881   PetscScalar       *val;
882   Mat               Ad,Ao;
883   Mat_SeqAIJ        *aa;
884   Mat_SeqAIJ        *bb;
885 #if defined(PETSC_USE_COMPLEX)
886   PetscBool         hermitian;
887 #endif
888 
889   PetscFunctionBegin;
890 #if defined(PETSC_USE_COMPLEX)
891   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
892   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
893 #endif
894   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
895   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
896   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
897 
898   aa    = (Mat_SeqAIJ*)(Ad)->data;
899   bb    = (Mat_SeqAIJ*)(Ao)->data;
900   ai    = aa->i;
901   aj    = aa->j;
902   adiag = aa->diag;
903   bi    = bb->i;
904   bj    = bb->j;
905 
906   rstart = A->rmap->rstart;
907 
908   if (reuse == MAT_INITIAL_MATRIX) {
909     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
910     nzb = 0;    /* num of upper triangular entries in mat->B */
911     for (i=0; i<m; i++) {
912       nza   += (ai[i+1] - adiag[i]);
913       countB = bi[i+1] - bi[i];
914       bjj    = bj + bi[i];
915       for (j=0; j<countB; j++) {
916         if (garray[bjj[j]] > rstart) nzb++;
917       }
918     }
919 
920     nz   = nza + nzb; /* total nz of upper triangular part of mat */
921     PetscCall(PetscMalloc2(nz,&row,nz,&col));
922     PetscCall(PetscMalloc1(nz,&val));
923     mumps->nnz = nz;
924     mumps->irn = row;
925     mumps->jcn = col;
926     mumps->val = mumps->val_alloc = val;
927   } else {
928     val = mumps->val;
929   }
930 
931   jj = 0; irow = rstart;
932   for (i=0; i<m; i++) {
933     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
934     v1     = av + adiag[i];
935     countA = ai[i+1] - adiag[i];
936     countB = bi[i+1] - bi[i];
937     bjj    = bj + bi[i];
938     v2     = bv + bi[i];
939 
940     /* A-part */
941     for (j=0; j<countA; j++) {
942       if (reuse == MAT_INITIAL_MATRIX) {
943         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
944         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
945       }
946       val[jj++] = v1[j];
947     }
948 
949     /* B-part */
950     for (j=0; j < countB; j++) {
951       if (garray[bjj[j]] > rstart) {
952         if (reuse == MAT_INITIAL_MATRIX) {
953           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
954           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
955         }
956         val[jj++] = v2[j];
957       }
958     }
959     irow++;
960   }
961   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
962   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatDestroy_MUMPS(Mat A)
967 {
968   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
969 
970   PetscFunctionBegin;
971   PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
972   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
973   PetscCall(VecScatterDestroy(&mumps->scat_sol));
974   PetscCall(VecDestroy(&mumps->b_seq));
975   PetscCall(VecDestroy(&mumps->x_seq));
976   PetscCall(PetscFree(mumps->id.perm_in));
977   PetscCall(PetscFree2(mumps->irn,mumps->jcn));
978   PetscCall(PetscFree(mumps->val_alloc));
979   PetscCall(PetscFree(mumps->info));
980   PetscCall(MatMumpsResetSchur_Private(mumps));
981   mumps->id.job = JOB_END;
982   PetscMUMPS_c(mumps);
983   PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
984 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
985   if (mumps->use_petsc_omp_support) {
986     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
987     PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
988     PetscCall(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps));
989   }
990 #endif
991   PetscCall(PetscFree(mumps->ia_alloc));
992   PetscCall(PetscFree(mumps->ja_alloc));
993   PetscCall(PetscFree(mumps->recvcount));
994   PetscCall(PetscFree(mumps->reqs));
995   PetscCall(PetscFree(mumps->irhs_loc));
996   if (mumps->mumps_comm != MPI_COMM_NULL) PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm));
997   PetscCall(PetscFree(A->data));
998 
999   /* clear composed functions */
1000   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1001   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL));
1002   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL));
1003   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL));
1004   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL));
1005   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL));
1006   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL));
1007   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL));
1008   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL));
1009   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL));
1010   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL));
1011   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL));
1012   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL));
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1017 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1018 {
1019   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1020   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1021   PetscInt           i,m,M,rstart;
1022 
1023   PetscFunctionBegin;
1024   PetscCall(MatGetSize(A,&M,NULL));
1025   PetscCall(MatGetLocalSize(A,&m,NULL));
1026   PetscCheckFalse(M > PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1027   if (ompsize == 1) {
1028     if (!mumps->irhs_loc) {
1029       mumps->nloc_rhs = m;
1030       PetscCall(PetscMalloc1(m,&mumps->irhs_loc));
1031       PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1032       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1033     }
1034     mumps->id.rhs_loc = (MumpsScalar*)array;
1035   } else {
1036   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1037     const PetscInt  *ranges;
1038     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1039     MPI_Group       petsc_group,omp_group;
1040     PetscScalar     *recvbuf=NULL;
1041 
1042     if (mumps->is_omp_master) {
1043       /* Lazily initialize the omp stuff for distributed rhs */
1044       if (!mumps->irhs_loc) {
1045         PetscCall(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks));
1046         PetscCall(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps));
1047         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group));
1048         PetscCallMPI(MPI_Comm_group(mumps->omp_comm,&omp_group));
1049         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1050         PetscCallMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks));
1051 
1052         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1053         mumps->nloc_rhs = 0;
1054         PetscCall(MatGetOwnershipRanges(A,&ranges));
1055         for (j=0; j<ompsize; j++) {
1056           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1057           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1058         }
1059         PetscCall(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc));
1060         for (j=k=0; j<ompsize; j++) {
1061           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1062         }
1063 
1064         PetscCall(PetscFree2(omp_ranks,petsc_ranks));
1065         PetscCallMPI(MPI_Group_free(&petsc_group));
1066         PetscCallMPI(MPI_Group_free(&omp_group));
1067       }
1068 
1069       /* Realloc buffers when current nrhs is bigger than what we have met */
1070       if (nrhs > mumps->max_nrhs) {
1071         PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
1072         PetscCall(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf));
1073         mumps->max_nrhs = nrhs;
1074       }
1075 
1076       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1077       for (j=0; j<ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]));
1078       mumps->rhs_disps[0] = 0;
1079       for (j=1; j<ompsize; j++) {
1080         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1081         PetscCheckFalse(mumps->rhs_disps[j] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1082       }
1083       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1084     }
1085 
1086     PetscCall(PetscMPIIntCast(m*nrhs,&sendcount));
1087     PetscCallMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm));
1088 
1089     if (mumps->is_omp_master) {
1090       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1091         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1092         for (j=0; j<ompsize; j++) {
1093           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1094           dst = dstbase;
1095           for (i=0; i<nrhs; i++) {
1096             PetscCall(PetscArraycpy(dst,src,mumps->rhs_nrow[j]));
1097             src += mumps->rhs_nrow[j];
1098             dst += mumps->nloc_rhs;
1099           }
1100           dstbase += mumps->rhs_nrow[j];
1101         }
1102       }
1103       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1104     }
1105   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1106   }
1107   mumps->id.nrhs     = nrhs;
1108   mumps->id.nloc_rhs = mumps->nloc_rhs;
1109   mumps->id.lrhs_loc = mumps->nloc_rhs;
1110   mumps->id.irhs_loc = mumps->irhs_loc;
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1115 {
1116   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1117   const PetscScalar  *rarray = NULL;
1118   PetscScalar        *array;
1119   IS                 is_iden,is_petsc;
1120   PetscInt           i;
1121   PetscBool          second_solve = PETSC_FALSE;
1122   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1123 
1124   PetscFunctionBegin;
1125   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1));
1126   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2));
1127 
1128   if (A->factorerrortype) {
1129     PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1130     PetscCall(VecSetInf(x));
1131     PetscFunctionReturn(0);
1132   }
1133 
1134   mumps->id.nrhs = 1;
1135   if (mumps->petsc_size > 1) {
1136     if (mumps->ICNTL20 == 10) {
1137       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1138       PetscCall(VecGetArrayRead(b,&rarray));
1139       PetscCall(MatMumpsSetUpDistRHSInfo(A,1,rarray));
1140     } else {
1141       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1142       PetscCall(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1143       PetscCall(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1144       if (!mumps->myid) {
1145         PetscCall(VecGetArray(mumps->b_seq,&array));
1146         mumps->id.rhs = (MumpsScalar*)array;
1147       }
1148     }
1149   } else {  /* petsc_size == 1 */
1150     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1151     PetscCall(VecCopy(b,x));
1152     PetscCall(VecGetArray(x,&array));
1153     mumps->id.rhs = (MumpsScalar*)array;
1154   }
1155 
1156   /*
1157      handle condensation step of Schur complement (if any)
1158      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1159      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1160      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1161      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1162   */
1163   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1164     PetscCheckFalse(mumps->petsc_size > 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1165     second_solve = PETSC_TRUE;
1166     PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1167   }
1168   /* solve phase */
1169   /*-------------*/
1170   mumps->id.job = JOB_SOLVE;
1171   PetscMUMPS_c(mumps);
1172   PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1173 
1174   /* handle expansion step of Schur complement (if any) */
1175   if (second_solve) {
1176     PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1177   }
1178 
1179   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1180     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1181       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1182       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1183     }
1184     if (!mumps->scat_sol) { /* create scatter scat_sol */
1185       PetscInt *isol2_loc=NULL;
1186       PetscCall(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */
1187       PetscCall(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc));
1188       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1189       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc));  /* to */
1190       PetscCall(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol));
1191       PetscCall(ISDestroy(&is_iden));
1192       PetscCall(ISDestroy(&is_petsc));
1193       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1194     }
1195 
1196     PetscCall(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1197     PetscCall(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1198   }
1199 
1200   if (mumps->petsc_size > 1) {
1201     if (mumps->ICNTL20 == 10) {
1202       PetscCall(VecRestoreArrayRead(b,&rarray));
1203     } else if (!mumps->myid) {
1204       PetscCall(VecRestoreArray(mumps->b_seq,&array));
1205     }
1206   } else PetscCall(VecRestoreArray(x,&array));
1207 
1208   PetscCall(PetscLogFlops(2.0*mumps->id.RINFO(3)));
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1213 {
1214   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1215 
1216   PetscFunctionBegin;
1217   mumps->id.ICNTL(9) = 0;
1218   PetscCall(MatSolve_MUMPS(A,b,x));
1219   mumps->id.ICNTL(9) = 1;
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1224 {
1225   Mat               Bt = NULL;
1226   PetscBool         denseX,denseB,flg,flgT;
1227   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1228   PetscInt          i,nrhs,M;
1229   PetscScalar       *array;
1230   const PetscScalar *rbray;
1231   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1232   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1233   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1234   IS                is_to,is_from;
1235   PetscInt          k,proc,j,m,myrstart;
1236   const PetscInt    *rstart;
1237   Vec               v_mpi,msol_loc;
1238   VecScatter        scat_sol;
1239   Vec               b_seq;
1240   VecScatter        scat_rhs;
1241   PetscScalar       *aa;
1242   PetscInt          spnr,*ia,*ja;
1243   Mat_MPIAIJ        *b = NULL;
1244 
1245   PetscFunctionBegin;
1246   PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL));
1247   PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1248 
1249   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL));
1250   if (denseB) {
1251     PetscCheckFalse(B->rmap->n != X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1252     mumps->id.ICNTL(20)= 0; /* dense RHS */
1253   } else { /* sparse B */
1254     PetscCheckFalse(X == B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1255     PetscCall(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT));
1256     if (flgT) { /* input B is transpose of actural RHS matrix,
1257                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1258       PetscCall(MatTransposeGetMat(B,&Bt));
1259     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1260     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1261   }
1262 
1263   PetscCall(MatGetSize(B,&M,&nrhs));
1264   mumps->id.nrhs = nrhs;
1265   mumps->id.lrhs = M;
1266   mumps->id.rhs  = NULL;
1267 
1268   if (mumps->petsc_size == 1) {
1269     PetscScalar *aa;
1270     PetscInt    spnr,*ia,*ja;
1271     PetscBool   second_solve = PETSC_FALSE;
1272 
1273     PetscCall(MatDenseGetArray(X,&array));
1274     mumps->id.rhs = (MumpsScalar*)array;
1275 
1276     if (denseB) {
1277       /* copy B to X */
1278       PetscCall(MatDenseGetArrayRead(B,&rbray));
1279       PetscCall(PetscArraycpy(array,rbray,M*nrhs));
1280       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1281     } else { /* sparse B */
1282       PetscCall(MatSeqAIJGetArray(Bt,&aa));
1283       PetscCall(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1284       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1285       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1286       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1287     }
1288     /* handle condensation step of Schur complement (if any) */
1289     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1290       second_solve = PETSC_TRUE;
1291       PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1292     }
1293     /* solve phase */
1294     /*-------------*/
1295     mumps->id.job = JOB_SOLVE;
1296     PetscMUMPS_c(mumps);
1297     PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1298 
1299     /* handle expansion step of Schur complement (if any) */
1300     if (second_solve) {
1301       PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1302     }
1303     if (!denseB) { /* sparse B */
1304       PetscCall(MatSeqAIJRestoreArray(Bt,&aa));
1305       PetscCall(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1306       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1307     }
1308     PetscCall(MatDenseRestoreArray(X,&array));
1309     PetscFunctionReturn(0);
1310   }
1311 
1312   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1313   PetscCheckFalse(mumps->petsc_size > 1 && mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1314 
1315   /* create msol_loc to hold mumps local solution */
1316   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1317   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1318 
1319   lsol_loc  = mumps->id.lsol_loc;
1320   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1321   PetscCall(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc));
1322   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1323   mumps->id.isol_loc = isol_loc;
1324 
1325   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc));
1326 
1327   if (denseB) {
1328     if (mumps->ICNTL20 == 10) {
1329       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1330       PetscCall(MatDenseGetArrayRead(B,&rbray));
1331       PetscCall(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray));
1332       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1333       PetscCall(MatGetLocalSize(B,&m,NULL));
1334       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi));
1335     } else {
1336       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1337       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1338         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1339         0, re-arrange B into desired order, which is a local operation.
1340       */
1341 
1342       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1343       /* wrap dense rhs matrix B into a vector v_mpi */
1344       PetscCall(MatGetLocalSize(B,&m,NULL));
1345       PetscCall(MatDenseGetArray(B,&bray));
1346       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1347       PetscCall(MatDenseRestoreArray(B,&bray));
1348 
1349       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1350       if (!mumps->myid) {
1351         PetscInt *idx;
1352         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1353         PetscCall(PetscMalloc1(nrhs*M,&idx));
1354         PetscCall(MatGetOwnershipRanges(B,&rstart));
1355         k = 0;
1356         for (proc=0; proc<mumps->petsc_size; proc++) {
1357           for (j=0; j<nrhs; j++) {
1358             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1359           }
1360         }
1361 
1362         PetscCall(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq));
1363         PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to));
1364         PetscCall(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from));
1365       } else {
1366         PetscCall(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq));
1367         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to));
1368         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from));
1369       }
1370       PetscCall(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs));
1371       PetscCall(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1372       PetscCall(ISDestroy(&is_to));
1373       PetscCall(ISDestroy(&is_from));
1374       PetscCall(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1375 
1376       if (!mumps->myid) { /* define rhs on the host */
1377         PetscCall(VecGetArray(b_seq,&bray));
1378         mumps->id.rhs = (MumpsScalar*)bray;
1379         PetscCall(VecRestoreArray(b_seq,&bray));
1380       }
1381     }
1382   } else { /* sparse B */
1383     b = (Mat_MPIAIJ*)Bt->data;
1384 
1385     /* wrap dense X into a vector v_mpi */
1386     PetscCall(MatGetLocalSize(X,&m,NULL));
1387     PetscCall(MatDenseGetArray(X,&bray));
1388     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1389     PetscCall(MatDenseRestoreArray(X,&bray));
1390 
1391     if (!mumps->myid) {
1392       PetscCall(MatSeqAIJGetArray(b->A,&aa));
1393       PetscCall(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1394       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1395       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1396       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1397     } else {
1398       mumps->id.irhs_ptr    = NULL;
1399       mumps->id.irhs_sparse = NULL;
1400       mumps->id.nz_rhs      = 0;
1401       mumps->id.rhs_sparse  = NULL;
1402     }
1403   }
1404 
1405   /* solve phase */
1406   /*-------------*/
1407   mumps->id.job = JOB_SOLVE;
1408   PetscMUMPS_c(mumps);
1409   PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1410 
1411   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1412   PetscCall(MatDenseGetArray(X,&array));
1413   PetscCall(VecPlaceArray(v_mpi,array));
1414 
1415   /* create scatter scat_sol */
1416   PetscCall(MatGetOwnershipRanges(X,&rstart));
1417   /* iidx: index for scatter mumps solution to petsc X */
1418 
1419   PetscCall(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from));
1420   PetscCall(PetscMalloc1(nlsol_loc,&idxx));
1421   for (i=0; i<lsol_loc; i++) {
1422     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1423 
1424     for (proc=0; proc<mumps->petsc_size; proc++) {
1425       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1426         myrstart = rstart[proc];
1427         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1428         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1429         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1430         break;
1431       }
1432     }
1433 
1434     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1435   }
1436   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to));
1437   PetscCall(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol));
1438   PetscCall(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1439   PetscCall(ISDestroy(&is_from));
1440   PetscCall(ISDestroy(&is_to));
1441   PetscCall(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1442   PetscCall(MatDenseRestoreArray(X,&array));
1443 
1444   /* free spaces */
1445   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1446   mumps->id.isol_loc = isol_loc_save;
1447 
1448   PetscCall(PetscFree2(sol_loc,isol_loc));
1449   PetscCall(PetscFree(idxx));
1450   PetscCall(VecDestroy(&msol_loc));
1451   PetscCall(VecDestroy(&v_mpi));
1452   if (!denseB) {
1453     if (!mumps->myid) {
1454       b = (Mat_MPIAIJ*)Bt->data;
1455       PetscCall(MatSeqAIJRestoreArray(b->A,&aa));
1456       PetscCall(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1457       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1458     }
1459   } else {
1460     if (mumps->ICNTL20 == 0) {
1461       PetscCall(VecDestroy(&b_seq));
1462       PetscCall(VecScatterDestroy(&scat_rhs));
1463     }
1464   }
1465   PetscCall(VecScatterDestroy(&scat_sol));
1466   PetscCall(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3)));
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1471 {
1472   PetscBool      flg;
1473   Mat            B;
1474 
1475   PetscFunctionBegin;
1476   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
1477   PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1478 
1479   /* Create B=Bt^T that uses Bt's data structure */
1480   PetscCall(MatCreateTranspose(Bt,&B));
1481 
1482   PetscCall(MatMatSolve_MUMPS(A,B,X));
1483   PetscCall(MatDestroy(&B));
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 #if !defined(PETSC_USE_COMPLEX)
1488 /*
1489   input:
1490    F:        numeric factor
1491   output:
1492    nneg:     total number of negative pivots
1493    nzero:    total number of zero pivots
1494    npos:     (global dimension of F) - nneg - nzero
1495 */
1496 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1497 {
1498   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1499   PetscMPIInt    size;
1500 
1501   PetscFunctionBegin;
1502   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size));
1503   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1504   PetscCheckFalse(size > 1 && mumps->id.ICNTL(13) != 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13));
1505 
1506   if (nneg) *nneg = mumps->id.INFOG(12);
1507   if (nzero || npos) {
1508     PetscCheckFalse(mumps->id.ICNTL(24) != 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1509     if (nzero) *nzero = mumps->id.INFOG(28);
1510     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1511   }
1512   PetscFunctionReturn(0);
1513 }
1514 #endif
1515 
1516 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1517 {
1518   PetscInt       i,nreqs;
1519   PetscMUMPSInt  *irn,*jcn;
1520   PetscMPIInt    count;
1521   PetscInt64     totnnz,remain;
1522   const PetscInt osize=mumps->omp_comm_size;
1523   PetscScalar    *val;
1524 
1525   PetscFunctionBegin;
1526   if (osize > 1) {
1527     if (reuse == MAT_INITIAL_MATRIX) {
1528       /* master first gathers counts of nonzeros to receive */
1529       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize,&mumps->recvcount));
1530       PetscCallMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm));
1531 
1532       /* Then each computes number of send/recvs */
1533       if (mumps->is_omp_master) {
1534         /* Start from 1 since self communication is not done in MPI */
1535         nreqs = 0;
1536         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1537       } else {
1538         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1539       }
1540       PetscCall(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */
1541 
1542       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1543          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1544          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1545          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1546        */
1547       nreqs = 0; /* counter for actual send/recvs */
1548       if (mumps->is_omp_master) {
1549         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1550         PetscCall(PetscMalloc2(totnnz,&irn,totnnz,&jcn));
1551         PetscCall(PetscMalloc1(totnnz,&val));
1552 
1553         /* Self communication */
1554         PetscCall(PetscArraycpy(irn,mumps->irn,mumps->nnz));
1555         PetscCall(PetscArraycpy(jcn,mumps->jcn,mumps->nnz));
1556         PetscCall(PetscArraycpy(val,mumps->val,mumps->nnz));
1557 
1558         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1559         PetscCall(PetscFree2(mumps->irn,mumps->jcn));
1560         PetscCall(PetscFree(mumps->val_alloc));
1561         mumps->nnz = totnnz;
1562         mumps->irn = irn;
1563         mumps->jcn = jcn;
1564         mumps->val = mumps->val_alloc = val;
1565 
1566         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1567         jcn += mumps->recvcount[0];
1568         val += mumps->recvcount[0];
1569 
1570         /* Remote communication */
1571         for (i=1; i<osize; i++) {
1572           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1573           remain = mumps->recvcount[i] - count;
1574           while (count>0) {
1575             PetscCallMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1576             PetscCallMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1577             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1578             irn    += count;
1579             jcn    += count;
1580             val    += count;
1581             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1582             remain -= count;
1583           }
1584         }
1585       } else {
1586         irn    = mumps->irn;
1587         jcn    = mumps->jcn;
1588         val    = mumps->val;
1589         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1590         remain = mumps->nnz - count;
1591         while (count>0) {
1592           PetscCallMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1593           PetscCallMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1594           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1595           irn    += count;
1596           jcn    += count;
1597           val    += count;
1598           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1599           remain -= count;
1600         }
1601       }
1602     } else {
1603       nreqs = 0;
1604       if (mumps->is_omp_master) {
1605         val = mumps->val + mumps->recvcount[0];
1606         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1607           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1608           remain = mumps->recvcount[i] - count;
1609           while (count>0) {
1610             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1611             val    += count;
1612             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1613             remain -= count;
1614           }
1615         }
1616       } else {
1617         val    = mumps->val;
1618         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1619         remain = mumps->nnz - count;
1620         while (count>0) {
1621           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1622           val    += count;
1623           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1624           remain -= count;
1625         }
1626       }
1627     }
1628     PetscCallMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE));
1629     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1635 {
1636   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1637   PetscBool      isMPIAIJ;
1638 
1639   PetscFunctionBegin;
1640   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1641     if (mumps->id.INFOG(1) == -6) {
1642       PetscCall(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1643     }
1644     PetscCall(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1645     PetscFunctionReturn(0);
1646   }
1647 
1648   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1649   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps));
1650 
1651   /* numerical factorization phase */
1652   /*-------------------------------*/
1653   mumps->id.job = JOB_FACTNUMERIC;
1654   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1655     if (!mumps->myid) {
1656       mumps->id.a = (MumpsScalar*)mumps->val;
1657     }
1658   } else {
1659     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1660   }
1661   PetscMUMPS_c(mumps);
1662   if (mumps->id.INFOG(1) < 0) {
1663     if (A->erroriffailure) {
1664       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
1665     } else {
1666       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1667         PetscCall(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1668         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1669       } else if (mumps->id.INFOG(1) == -13) {
1670         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1671         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1672       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1673         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1674         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1675       } else {
1676         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1677         F->factorerrortype = MAT_FACTOR_OTHER;
1678       }
1679     }
1680   }
1681   PetscCheckFalse(!mumps->myid && mumps->id.ICNTL(16) > 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16));
1682 
1683   F->assembled    = PETSC_TRUE;
1684 
1685   if (F->schur) { /* reset Schur status to unfactored */
1686 #if defined(PETSC_HAVE_CUDA)
1687     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1688 #endif
1689     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1690       mumps->id.ICNTL(19) = 2;
1691       PetscCall(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur));
1692     }
1693     PetscCall(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED));
1694   }
1695 
1696   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1697   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1698 
1699   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1700   if (mumps->petsc_size > 1) {
1701     PetscInt    lsol_loc;
1702     PetscScalar *sol_loc;
1703 
1704     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ));
1705 
1706     /* distributed solution; Create x_seq=sol_loc for repeated use */
1707     if (mumps->x_seq) {
1708       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1709       PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
1710       PetscCall(VecDestroy(&mumps->x_seq));
1711     }
1712     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1713     PetscCall(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc));
1714     mumps->id.lsol_loc = lsol_loc;
1715     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1716     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq));
1717   }
1718   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 /* Sets MUMPS options from the options database */
1723 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1724 {
1725   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1726   PetscErrorCode ierr;
1727   PetscMUMPSInt  icntl=0;
1728   PetscInt       info[80],i,ninfo=80;
1729   PetscBool      flg=PETSC_FALSE;
1730 
1731   PetscFunctionBegin;
1732   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");PetscCall(ierr);
1733   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg));
1734   if (flg) mumps->id.ICNTL(1) = icntl;
1735   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg));
1736   if (flg) mumps->id.ICNTL(2) = icntl;
1737   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg));
1738   if (flg) mumps->id.ICNTL(3) = icntl;
1739 
1740   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg));
1741   if (flg) mumps->id.ICNTL(4) = icntl;
1742   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1743 
1744   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg));
1745   if (flg) mumps->id.ICNTL(6) = icntl;
1746 
1747   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg));
1748   if (flg) {
1749     PetscCheckFalse(icntl == 1 || icntl < 0 || icntl > 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1750     mumps->id.ICNTL(7) = icntl;
1751   }
1752 
1753   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL));
1754   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1755   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL));
1756   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL));
1757   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL));
1758   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL));
1759   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL));
1760   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL));
1761   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1762     PetscCall(MatDestroy(&F->schur));
1763     PetscCall(MatMumpsResetSchur_Private(mumps));
1764   }
1765 
1766   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1767      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1768      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1769      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1770      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1771      In short, we could not use distributed RHS with MPICH until v4.0b1.
1772    */
1773 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1774   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1775 #else
1776   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1777 #endif
1778   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg));
1779   PetscCheckFalse(flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20);
1780 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1781   PetscCheckFalse(flg && mumps->ICNTL20 == 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1782 #endif
1783   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1784 
1785   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL));
1786   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL));
1787   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL));
1788   if (mumps->id.ICNTL(24)) {
1789     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1790   }
1791 
1792   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL));
1793   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL));
1794   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL));
1795   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL));
1796   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL));
1797   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1798   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL));
1799   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1800   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL));
1801   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL));
1802   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL));
1803   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL));
1804 
1805   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL));
1806   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL));
1807   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL));
1808   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL));
1809   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL));
1810   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL));
1811 
1812   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1813 
1814   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL));
1815   if (ninfo) {
1816     PetscCheckFalse(ninfo > 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo);
1817     PetscCall(PetscMalloc1(ninfo,&mumps->info));
1818     mumps->ninfo = ninfo;
1819     for (i=0; i<ninfo; i++) {
1820       PetscCheckFalse(info[i] < 0 || info[i]>80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo);
1821       else  mumps->info[i] = info[i];
1822     }
1823   }
1824 
1825   ierr = PetscOptionsEnd();PetscCall(ierr);
1826   PetscFunctionReturn(0);
1827 }
1828 
1829 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1830 {
1831   PetscInt       nthreads=0;
1832 
1833   PetscFunctionBegin;
1834   mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1835   PetscCallMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size));
1836   PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */
1837 
1838   PetscCall(PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support));
1839   if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1840   PetscCall(PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL));
1841   if (mumps->use_petsc_omp_support) {
1842 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1843     PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl));
1844     PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master));
1845 #else
1846     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:"");
1847 #endif
1848   } else {
1849     mumps->omp_comm      = PETSC_COMM_SELF;
1850     mumps->mumps_comm    = mumps->petsc_comm;
1851     mumps->is_omp_master = PETSC_TRUE;
1852   }
1853   PetscCallMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size));
1854   mumps->reqs = NULL;
1855   mumps->tag  = 0;
1856 
1857   /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1858   if (mumps->mumps_comm != MPI_COMM_NULL) {
1859     PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm));
1860   }
1861 
1862   mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1863   mumps->id.job = JOB_INIT;
1864   mumps->id.par = 1;  /* host participates factorizaton and solve */
1865   mumps->id.sym = mumps->sym;
1866 
1867   PetscMUMPS_c(mumps);
1868   PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
1869 
1870   /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1871      For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1872    */
1873   PetscCallMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm));
1874   PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm));
1875 
1876   mumps->scat_rhs = NULL;
1877   mumps->scat_sol = NULL;
1878 
1879   /* set PETSc-MUMPS default options - override MUMPS default */
1880   mumps->id.ICNTL(3) = 0;
1881   mumps->id.ICNTL(4) = 0;
1882   if (mumps->petsc_size == 1) {
1883     mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1884     mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1885   } else {
1886     mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1887     mumps->id.ICNTL(21) = 1;   /* distributed solution */
1888   }
1889 
1890   /* schur */
1891   mumps->id.size_schur    = 0;
1892   mumps->id.listvar_schur = NULL;
1893   mumps->id.schur         = NULL;
1894   mumps->sizeredrhs       = 0;
1895   mumps->schur_sol        = NULL;
1896   mumps->schur_sizesol    = 0;
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1901 {
1902   PetscFunctionBegin;
1903   if (mumps->id.INFOG(1) < 0) {
1904     if (A->erroriffailure) {
1905       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1));
1906     } else {
1907       if (mumps->id.INFOG(1) == -6) {
1908         PetscCall(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1909         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1910       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1911         PetscCall(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1912         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1913       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1914         PetscCall(PetscInfo(F,"Empty matrix\n"));
1915       } else {
1916         PetscCall(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1917         F->factorerrortype = MAT_FACTOR_OTHER;
1918       }
1919     }
1920   }
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1925 {
1926   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1927   Vec            b;
1928   const PetscInt M = A->rmap->N;
1929 
1930   PetscFunctionBegin;
1931   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1932     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1933     PetscFunctionReturn(0);
1934   }
1935 
1936   /* Set MUMPS options from the options database */
1937   PetscCall(PetscSetMUMPSFromOptions(F,A));
1938 
1939   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1940   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
1941 
1942   /* analysis phase */
1943   /*----------------*/
1944   mumps->id.job = JOB_FACTSYMBOLIC;
1945   mumps->id.n   = M;
1946   switch (mumps->id.ICNTL(18)) {
1947   case 0:  /* centralized assembled matrix input */
1948     if (!mumps->myid) {
1949       mumps->id.nnz = mumps->nnz;
1950       mumps->id.irn = mumps->irn;
1951       mumps->id.jcn = mumps->jcn;
1952       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1953       if (r) {
1954         mumps->id.ICNTL(7) = 1;
1955         if (!mumps->myid) {
1956           const PetscInt *idx;
1957           PetscInt       i;
1958 
1959           PetscCall(PetscMalloc1(M,&mumps->id.perm_in));
1960           PetscCall(ISGetIndices(r,&idx));
1961           for (i=0; i<M; i++) PetscCall(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
1962           PetscCall(ISRestoreIndices(r,&idx));
1963         }
1964       }
1965     }
1966     break;
1967   case 3:  /* distributed assembled matrix input (size>1) */
1968     mumps->id.nnz_loc = mumps->nnz;
1969     mumps->id.irn_loc = mumps->irn;
1970     mumps->id.jcn_loc = mumps->jcn;
1971     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1972     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1973       PetscCall(MatCreateVecs(A,NULL,&b));
1974       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
1975       PetscCall(VecDestroy(&b));
1976     }
1977     break;
1978   }
1979   PetscMUMPS_c(mumps);
1980   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
1981 
1982   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
1983   F->ops->solve           = MatSolve_MUMPS;
1984   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
1985   F->ops->matsolve        = MatMatSolve_MUMPS;
1986   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
1987 
1988   mumps->matstruc = SAME_NONZERO_PATTERN;
1989   PetscFunctionReturn(0);
1990 }
1991 
1992 /* Note the Petsc r and c permutations are ignored */
1993 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1994 {
1995   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1996   Vec            b;
1997   const PetscInt M = A->rmap->N;
1998 
1999   PetscFunctionBegin;
2000   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2001     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2002     PetscFunctionReturn(0);
2003   }
2004 
2005   /* Set MUMPS options from the options database */
2006   PetscCall(PetscSetMUMPSFromOptions(F,A));
2007 
2008   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2009   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2010 
2011   /* analysis phase */
2012   /*----------------*/
2013   mumps->id.job = JOB_FACTSYMBOLIC;
2014   mumps->id.n   = M;
2015   switch (mumps->id.ICNTL(18)) {
2016   case 0:  /* centralized assembled matrix input */
2017     if (!mumps->myid) {
2018       mumps->id.nnz = mumps->nnz;
2019       mumps->id.irn = mumps->irn;
2020       mumps->id.jcn = mumps->jcn;
2021       if (mumps->id.ICNTL(6)>1) {
2022         mumps->id.a = (MumpsScalar*)mumps->val;
2023       }
2024     }
2025     break;
2026   case 3:  /* distributed assembled matrix input (size>1) */
2027     mumps->id.nnz_loc = mumps->nnz;
2028     mumps->id.irn_loc = mumps->irn;
2029     mumps->id.jcn_loc = mumps->jcn;
2030     if (mumps->id.ICNTL(6)>1) {
2031       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2032     }
2033     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2034       PetscCall(MatCreateVecs(A,NULL,&b));
2035       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2036       PetscCall(VecDestroy(&b));
2037     }
2038     break;
2039   }
2040   PetscMUMPS_c(mumps);
2041   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2042 
2043   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2044   F->ops->solve           = MatSolve_MUMPS;
2045   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2046 
2047   mumps->matstruc = SAME_NONZERO_PATTERN;
2048   PetscFunctionReturn(0);
2049 }
2050 
2051 /* Note the Petsc r permutation and factor info are ignored */
2052 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2053 {
2054   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2055   Vec            b;
2056   const PetscInt M = A->rmap->N;
2057 
2058   PetscFunctionBegin;
2059   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2060     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2061     PetscFunctionReturn(0);
2062   }
2063 
2064   /* Set MUMPS options from the options database */
2065   PetscCall(PetscSetMUMPSFromOptions(F,A));
2066 
2067   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2068   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2069 
2070   /* analysis phase */
2071   /*----------------*/
2072   mumps->id.job = JOB_FACTSYMBOLIC;
2073   mumps->id.n   = M;
2074   switch (mumps->id.ICNTL(18)) {
2075   case 0:  /* centralized assembled matrix input */
2076     if (!mumps->myid) {
2077       mumps->id.nnz = mumps->nnz;
2078       mumps->id.irn = mumps->irn;
2079       mumps->id.jcn = mumps->jcn;
2080       if (mumps->id.ICNTL(6)>1) {
2081         mumps->id.a = (MumpsScalar*)mumps->val;
2082       }
2083     }
2084     break;
2085   case 3:  /* distributed assembled matrix input (size>1) */
2086     mumps->id.nnz_loc = mumps->nnz;
2087     mumps->id.irn_loc = mumps->irn;
2088     mumps->id.jcn_loc = mumps->jcn;
2089     if (mumps->id.ICNTL(6)>1) {
2090       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2091     }
2092     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2093       PetscCall(MatCreateVecs(A,NULL,&b));
2094       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2095       PetscCall(VecDestroy(&b));
2096     }
2097     break;
2098   }
2099   PetscMUMPS_c(mumps);
2100   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2101 
2102   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2103   F->ops->solve                 = MatSolve_MUMPS;
2104   F->ops->solvetranspose        = MatSolve_MUMPS;
2105   F->ops->matsolve              = MatMatSolve_MUMPS;
2106   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2107 #if defined(PETSC_USE_COMPLEX)
2108   F->ops->getinertia = NULL;
2109 #else
2110   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2111 #endif
2112 
2113   mumps->matstruc = SAME_NONZERO_PATTERN;
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2118 {
2119   PetscBool         iascii;
2120   PetscViewerFormat format;
2121   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2122 
2123   PetscFunctionBegin;
2124   /* check if matrix is mumps type */
2125   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2126 
2127   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
2128   if (iascii) {
2129     PetscCall(PetscViewerGetFormat(viewer,&format));
2130     if (format == PETSC_VIEWER_ASCII_INFO) {
2131       PetscCall(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n"));
2132       PetscCall(PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d\n",mumps->id.sym));
2133       PetscCall(PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d\n",mumps->id.par));
2134       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d\n",mumps->id.ICNTL(1)));
2135       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2)));
2136       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d\n",mumps->id.ICNTL(3)));
2137       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d\n",mumps->id.ICNTL(4)));
2138       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d\n",mumps->id.ICNTL(5)));
2139       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d\n",mumps->id.ICNTL(6)));
2140       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7)));
2141       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d\n",mumps->id.ICNTL(8)));
2142       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d\n",mumps->id.ICNTL(10)));
2143       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d\n",mumps->id.ICNTL(11)));
2144       if (mumps->id.ICNTL(11)>0) {
2145         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4)));
2146         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5)));
2147         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6)));
2148         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8)));
2149         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9)));
2150         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11)));
2151       }
2152       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d\n",mumps->id.ICNTL(12)));
2153       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d\n",mumps->id.ICNTL(13)));
2154       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14)));
2155       /* ICNTL(15-17) not used */
2156       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d\n",mumps->id.ICNTL(18)));
2157       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d\n",mumps->id.ICNTL(19)));
2158       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d\n",mumps->id.ICNTL(20)));
2159       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d\n",mumps->id.ICNTL(21)));
2160       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d\n",mumps->id.ICNTL(22)));
2161       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23)));
2162 
2163       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d\n",mumps->id.ICNTL(24)));
2164       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d\n",mumps->id.ICNTL(25)));
2165       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d\n",mumps->id.ICNTL(26)));
2166       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d\n",mumps->id.ICNTL(27)));
2167       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d\n",mumps->id.ICNTL(28)));
2168       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d\n",mumps->id.ICNTL(29)));
2169 
2170       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n",mumps->id.ICNTL(30)));
2171       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d\n",mumps->id.ICNTL(31)));
2172       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d\n",mumps->id.ICNTL(33)));
2173       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d\n",mumps->id.ICNTL(35)));
2174       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d\n",mumps->id.ICNTL(36)));
2175       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d\n",mumps->id.ICNTL(38)));
2176 
2177       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1)));
2178       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2)));
2179       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3)));
2180       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4)));
2181       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5)));
2182       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7)));
2183 
2184       /* information local to each processor */
2185       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n"));
2186       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2187       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1)));
2188       PetscCall(PetscViewerFlush(viewer));
2189       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n"));
2190       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2)));
2191       PetscCall(PetscViewerFlush(viewer));
2192       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n"));
2193       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3)));
2194       PetscCall(PetscViewerFlush(viewer));
2195 
2196       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n"));
2197       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d\n",mumps->myid,mumps->id.INFO(15)));
2198       PetscCall(PetscViewerFlush(viewer));
2199 
2200       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n"));
2201       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(16)));
2202       PetscCall(PetscViewerFlush(viewer));
2203 
2204       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n"));
2205       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(23)));
2206       PetscCall(PetscViewerFlush(viewer));
2207 
2208       if (mumps->ninfo && mumps->ninfo <= 80) {
2209         PetscInt i;
2210         for (i=0; i<mumps->ninfo; i++) {
2211           PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "): \n",mumps->info[i]));
2212           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i])));
2213           PetscCall(PetscViewerFlush(viewer));
2214         }
2215       }
2216       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2217 
2218       if (!mumps->myid) { /* information from the host */
2219         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1)));
2220         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2)));
2221         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3)));
2222         PetscCall(PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34)));
2223 
2224         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3)));
2225         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4)));
2226         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5)));
2227         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6)));
2228         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7)));
2229         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8)));
2230         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9)));
2231         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10)));
2232         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11)));
2233         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12)));
2234         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13)));
2235         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14)));
2236         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15)));
2237         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16)));
2238         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17)));
2239         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18)));
2240         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19)));
2241         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20)));
2242         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21)));
2243         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22)));
2244         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23)));
2245         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24)));
2246         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25)));
2247         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28)));
2248         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29)));
2249         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31)));
2250         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32)));
2251         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33)));
2252         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34)));
2253         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35)));
2254         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36)));
2255         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37)));
2256         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38)));
2257         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39)));
2258       }
2259     }
2260   }
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2265 {
2266   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2267 
2268   PetscFunctionBegin;
2269   info->block_size        = 1.0;
2270   info->nz_allocated      = mumps->id.INFOG(20);
2271   info->nz_used           = mumps->id.INFOG(20);
2272   info->nz_unneeded       = 0.0;
2273   info->assemblies        = 0.0;
2274   info->mallocs           = 0.0;
2275   info->memory            = 0.0;
2276   info->fill_ratio_given  = 0;
2277   info->fill_ratio_needed = 0;
2278   info->factor_mallocs    = 0;
2279   PetscFunctionReturn(0);
2280 }
2281 
2282 /* -------------------------------------------------------------------------------------------*/
2283 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2284 {
2285   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2286   const PetscScalar *arr;
2287   const PetscInt    *idxs;
2288   PetscInt          size,i;
2289 
2290   PetscFunctionBegin;
2291   PetscCall(ISGetLocalSize(is,&size));
2292   if (mumps->petsc_size > 1) {
2293     PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2294 
2295     ls   = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2296     PetscCallMPI(MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm));
2297     PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc");
2298   }
2299 
2300   /* Schur complement matrix */
2301   PetscCall(MatDestroy(&F->schur));
2302   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur));
2303   PetscCall(MatDenseGetArrayRead(F->schur,&arr));
2304   mumps->id.schur      = (MumpsScalar*)arr;
2305   mumps->id.size_schur = size;
2306   mumps->id.schur_lld  = size;
2307   PetscCall(MatDenseRestoreArrayRead(F->schur,&arr));
2308   if (mumps->sym == 1) {
2309     PetscCall(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE));
2310   }
2311 
2312   /* MUMPS expects Fortran style indices */
2313   PetscCall(PetscFree(mumps->id.listvar_schur));
2314   PetscCall(PetscMalloc1(size,&mumps->id.listvar_schur));
2315   PetscCall(ISGetIndices(is,&idxs));
2316   for (i=0; i<size; i++) PetscCall(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i])));
2317   PetscCall(ISRestoreIndices(is,&idxs));
2318   if (mumps->petsc_size > 1) {
2319     mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2320   } else {
2321     if (F->factortype == MAT_FACTOR_LU) {
2322       mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2323     } else {
2324       mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2325     }
2326   }
2327   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2328   mumps->id.ICNTL(26) = -1;
2329   PetscFunctionReturn(0);
2330 }
2331 
2332 /* -------------------------------------------------------------------------------------------*/
2333 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2334 {
2335   Mat            St;
2336   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2337   PetscScalar    *array;
2338 #if defined(PETSC_USE_COMPLEX)
2339   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2340 #endif
2341 
2342   PetscFunctionBegin;
2343   PetscCheckFalse(!mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2344   PetscCall(MatCreate(PETSC_COMM_SELF,&St));
2345   PetscCall(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur));
2346   PetscCall(MatSetType(St,MATDENSE));
2347   PetscCall(MatSetUp(St));
2348   PetscCall(MatDenseGetArray(St,&array));
2349   if (!mumps->sym) { /* MUMPS always return a full matrix */
2350     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2351       PetscInt i,j,N=mumps->id.size_schur;
2352       for (i=0;i<N;i++) {
2353         for (j=0;j<N;j++) {
2354 #if !defined(PETSC_USE_COMPLEX)
2355           PetscScalar val = mumps->id.schur[i*N+j];
2356 #else
2357           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2358 #endif
2359           array[j*N+i] = val;
2360         }
2361       }
2362     } else { /* stored by columns */
2363       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2364     }
2365   } else { /* either full or lower-triangular (not packed) */
2366     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2367       PetscInt i,j,N=mumps->id.size_schur;
2368       for (i=0;i<N;i++) {
2369         for (j=i;j<N;j++) {
2370 #if !defined(PETSC_USE_COMPLEX)
2371           PetscScalar val = mumps->id.schur[i*N+j];
2372 #else
2373           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2374 #endif
2375           array[i*N+j] = val;
2376           array[j*N+i] = val;
2377         }
2378       }
2379     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2380       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2381     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2382       PetscInt i,j,N=mumps->id.size_schur;
2383       for (i=0;i<N;i++) {
2384         for (j=0;j<i+1;j++) {
2385 #if !defined(PETSC_USE_COMPLEX)
2386           PetscScalar val = mumps->id.schur[i*N+j];
2387 #else
2388           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2389 #endif
2390           array[i*N+j] = val;
2391           array[j*N+i] = val;
2392         }
2393       }
2394     }
2395   }
2396   PetscCall(MatDenseRestoreArray(St,&array));
2397   *S   = St;
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 /* -------------------------------------------------------------------------------------------*/
2402 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2403 {
2404   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2405 
2406   PetscFunctionBegin;
2407   PetscCall(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl)));
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2412 {
2413   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2414 
2415   PetscFunctionBegin;
2416   *ival = mumps->id.ICNTL(icntl);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 /*@
2421   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2422 
2423    Logically Collective on Mat
2424 
2425    Input Parameters:
2426 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2427 .  icntl - index of MUMPS parameter array ICNTL()
2428 -  ival - value of MUMPS ICNTL(icntl)
2429 
2430   Options Database:
2431 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2432 
2433    Level: beginner
2434 
2435    References:
2436 .  * - MUMPS Users' Guide
2437 
2438 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2439 @*/
2440 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2441 {
2442   PetscFunctionBegin;
2443   PetscValidType(F,1);
2444   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2445   PetscValidLogicalCollectiveInt(F,icntl,2);
2446   PetscValidLogicalCollectiveInt(F,ival,3);
2447   PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
2448   PetscFunctionReturn(0);
2449 }
2450 
2451 /*@
2452   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2453 
2454    Logically Collective on Mat
2455 
2456    Input Parameters:
2457 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2458 -  icntl - index of MUMPS parameter array ICNTL()
2459 
2460   Output Parameter:
2461 .  ival - value of MUMPS ICNTL(icntl)
2462 
2463    Level: beginner
2464 
2465    References:
2466 .  * - MUMPS Users' Guide
2467 
2468 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2469 @*/
2470 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2471 {
2472   PetscFunctionBegin;
2473   PetscValidType(F,1);
2474   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2475   PetscValidLogicalCollectiveInt(F,icntl,2);
2476   PetscValidIntPointer(ival,3);
2477   PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 /* -------------------------------------------------------------------------------------------*/
2482 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2483 {
2484   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2485 
2486   PetscFunctionBegin;
2487   mumps->id.CNTL(icntl) = val;
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2492 {
2493   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2494 
2495   PetscFunctionBegin;
2496   *val = mumps->id.CNTL(icntl);
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 /*@
2501   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2502 
2503    Logically Collective on Mat
2504 
2505    Input Parameters:
2506 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2507 .  icntl - index of MUMPS parameter array CNTL()
2508 -  val - value of MUMPS CNTL(icntl)
2509 
2510   Options Database:
2511 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2512 
2513    Level: beginner
2514 
2515    References:
2516 .  * - MUMPS Users' Guide
2517 
2518 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2519 @*/
2520 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2521 {
2522   PetscFunctionBegin;
2523   PetscValidType(F,1);
2524   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2525   PetscValidLogicalCollectiveInt(F,icntl,2);
2526   PetscValidLogicalCollectiveReal(F,val,3);
2527   PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 /*@
2532   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2533 
2534    Logically Collective on Mat
2535 
2536    Input Parameters:
2537 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2538 -  icntl - index of MUMPS parameter array CNTL()
2539 
2540   Output Parameter:
2541 .  val - value of MUMPS CNTL(icntl)
2542 
2543    Level: beginner
2544 
2545    References:
2546 .  * - MUMPS Users' Guide
2547 
2548 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2549 @*/
2550 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2551 {
2552   PetscFunctionBegin;
2553   PetscValidType(F,1);
2554   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2555   PetscValidLogicalCollectiveInt(F,icntl,2);
2556   PetscValidRealPointer(val,3);
2557   PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2562 {
2563   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2564 
2565   PetscFunctionBegin;
2566   *info = mumps->id.INFO(icntl);
2567   PetscFunctionReturn(0);
2568 }
2569 
2570 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2571 {
2572   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2573 
2574   PetscFunctionBegin;
2575   *infog = mumps->id.INFOG(icntl);
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2580 {
2581   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2582 
2583   PetscFunctionBegin;
2584   *rinfo = mumps->id.RINFO(icntl);
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2589 {
2590   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2591 
2592   PetscFunctionBegin;
2593   *rinfog = mumps->id.RINFOG(icntl);
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2598 {
2599   Mat            Bt = NULL,Btseq = NULL;
2600   PetscBool      flg;
2601   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2602   PetscScalar    *aa;
2603   PetscInt       spnr,*ia,*ja,M,nrhs;
2604 
2605   PetscFunctionBegin;
2606   PetscValidPointer(spRHS,2);
2607   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg));
2608   if (flg) {
2609     PetscCall(MatTransposeGetMat(spRHS,&Bt));
2610   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2611 
2612   PetscCall(MatMumpsSetIcntl(F,30,1));
2613 
2614   if (mumps->petsc_size > 1) {
2615     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2616     Btseq = b->A;
2617   } else {
2618     Btseq = Bt;
2619   }
2620 
2621   PetscCall(MatGetSize(spRHS,&M,&nrhs));
2622   mumps->id.nrhs = nrhs;
2623   mumps->id.lrhs = M;
2624   mumps->id.rhs  = NULL;
2625 
2626   if (!mumps->myid) {
2627     PetscCall(MatSeqAIJGetArray(Btseq,&aa));
2628     PetscCall(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2629     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2630     PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
2631     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2632   } else {
2633     mumps->id.irhs_ptr    = NULL;
2634     mumps->id.irhs_sparse = NULL;
2635     mumps->id.nz_rhs      = 0;
2636     mumps->id.rhs_sparse  = NULL;
2637   }
2638   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2639   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2640 
2641   /* solve phase */
2642   /*-------------*/
2643   mumps->id.job = JOB_SOLVE;
2644   PetscMUMPS_c(mumps);
2645   if (mumps->id.INFOG(1) < 0)
2646     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
2647 
2648   if (!mumps->myid) {
2649     PetscCall(MatSeqAIJRestoreArray(Btseq,&aa));
2650     PetscCall(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2651     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2652   }
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 /*@
2657   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2658 
2659    Logically Collective on Mat
2660 
2661    Input Parameters:
2662 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2663 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2664 
2665   Output Parameter:
2666 . spRHS - requested entries of inverse of A
2667 
2668    Level: beginner
2669 
2670    References:
2671 .  * - MUMPS Users' Guide
2672 
2673 .seealso: MatGetFactor(), MatCreateTranspose()
2674 @*/
2675 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2676 {
2677   PetscFunctionBegin;
2678   PetscValidType(F,1);
2679   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2680   PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));
2681   PetscFunctionReturn(0);
2682 }
2683 
2684 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2685 {
2686   Mat            spRHS;
2687 
2688   PetscFunctionBegin;
2689   PetscCall(MatCreateTranspose(spRHST,&spRHS));
2690   PetscCall(MatMumpsGetInverse_MUMPS(F,spRHS));
2691   PetscCall(MatDestroy(&spRHS));
2692   PetscFunctionReturn(0);
2693 }
2694 
2695 /*@
2696   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2697 
2698    Logically Collective on Mat
2699 
2700    Input Parameters:
2701 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2702 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2703 
2704   Output Parameter:
2705 . spRHST - requested entries of inverse of A^T
2706 
2707    Level: beginner
2708 
2709    References:
2710 .  * - MUMPS Users' Guide
2711 
2712 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2713 @*/
2714 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2715 {
2716   PetscBool      flg;
2717 
2718   PetscFunctionBegin;
2719   PetscValidType(F,1);
2720   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2721   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
2722   PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2723 
2724   PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));
2725   PetscFunctionReturn(0);
2726 }
2727 
2728 /*@
2729   MatMumpsGetInfo - Get MUMPS parameter INFO()
2730 
2731    Logically Collective on Mat
2732 
2733    Input Parameters:
2734 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2735 -  icntl - index of MUMPS parameter array INFO()
2736 
2737   Output Parameter:
2738 .  ival - value of MUMPS INFO(icntl)
2739 
2740    Level: beginner
2741 
2742    References:
2743 .  * - MUMPS Users' Guide
2744 
2745 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2746 @*/
2747 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2748 {
2749   PetscFunctionBegin;
2750   PetscValidType(F,1);
2751   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2752   PetscValidIntPointer(ival,3);
2753   PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2754   PetscFunctionReturn(0);
2755 }
2756 
2757 /*@
2758   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2759 
2760    Logically Collective on Mat
2761 
2762    Input Parameters:
2763 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2764 -  icntl - index of MUMPS parameter array INFOG()
2765 
2766   Output Parameter:
2767 .  ival - value of MUMPS INFOG(icntl)
2768 
2769    Level: beginner
2770 
2771    References:
2772 .  * - MUMPS Users' Guide
2773 
2774 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2775 @*/
2776 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2777 {
2778   PetscFunctionBegin;
2779   PetscValidType(F,1);
2780   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2781   PetscValidIntPointer(ival,3);
2782   PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 /*@
2787   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2788 
2789    Logically Collective on Mat
2790 
2791    Input Parameters:
2792 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2793 -  icntl - index of MUMPS parameter array RINFO()
2794 
2795   Output Parameter:
2796 .  val - value of MUMPS RINFO(icntl)
2797 
2798    Level: beginner
2799 
2800    References:
2801 .  * - MUMPS Users' Guide
2802 
2803 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2804 @*/
2805 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2806 {
2807   PetscFunctionBegin;
2808   PetscValidType(F,1);
2809   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2810   PetscValidRealPointer(val,3);
2811   PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2812   PetscFunctionReturn(0);
2813 }
2814 
2815 /*@
2816   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2817 
2818    Logically Collective on Mat
2819 
2820    Input Parameters:
2821 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2822 -  icntl - index of MUMPS parameter array RINFOG()
2823 
2824   Output Parameter:
2825 .  val - value of MUMPS RINFOG(icntl)
2826 
2827    Level: beginner
2828 
2829    References:
2830 .  * - MUMPS Users' Guide
2831 
2832 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2833 @*/
2834 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2835 {
2836   PetscFunctionBegin;
2837   PetscValidType(F,1);
2838   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2839   PetscValidRealPointer(val,3);
2840   PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2841   PetscFunctionReturn(0);
2842 }
2843 
2844 /*MC
2845   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2846   distributed and sequential matrices via the external package MUMPS.
2847 
2848   Works with MATAIJ and MATSBAIJ matrices
2849 
2850   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2851 
2852   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2853 
2854   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2855 
2856   Options Database Keys:
2857 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2858 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2859 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2860 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2861 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2862 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2863                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2864 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2865 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2866 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2867 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2868 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2869 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2870 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2871 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2872 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2873 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2874 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2875 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2876 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2877 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2878 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2879 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2880 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2881 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2882 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2883 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2884 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2885 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2886 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2887 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2888 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2889 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2890 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2891 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2892                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2893 
2894   Level: beginner
2895 
2896     Notes:
2897     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2898 
2899     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2900 $          KSPGetPC(ksp,&pc);
2901 $          PCFactorGetMatrix(pc,&mat);
2902 $          MatMumpsGetInfo(mat,....);
2903 $          MatMumpsGetInfog(mat,....); etc.
2904            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2905 
2906   Using MUMPS with 64-bit integers
2907     MUMPS provides 64-bit integer support in two build modes:
2908       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2909       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2910 
2911       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2912       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2913       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2914       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2915 
2916     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2917 
2918   Two modes to run MUMPS/PETSc with OpenMP
2919 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2920 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2921 
2922 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2923 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2924 
2925    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2926    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2927    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2928    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2929    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2930 
2931    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2932    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2933    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2934    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2935    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2936    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2937    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2938    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2939    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2940    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2941    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2942    examine the mapping result.
2943 
2944    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2945    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2946    calls omp_set_num_threads(m) internally before calling MUMPS.
2947 
2948    References:
2949 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2950 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2951 
2952 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix()
2953 
2954 M*/
2955 
2956 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2957 {
2958   PetscFunctionBegin;
2959   *type = MATSOLVERMUMPS;
2960   PetscFunctionReturn(0);
2961 }
2962 
2963 /* MatGetFactor for Seq and MPI AIJ matrices */
2964 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
2965 {
2966   Mat            B;
2967   Mat_MUMPS      *mumps;
2968   PetscBool      isSeqAIJ;
2969   PetscMPIInt    size;
2970 
2971   PetscFunctionBegin;
2972  #if defined(PETSC_USE_COMPLEX)
2973   PetscCheckFalse(A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
2974  #endif
2975   /* Create the factorization matrix */
2976   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ));
2977   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
2978   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2979   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
2980   PetscCall(MatSetUp(B));
2981 
2982   PetscCall(PetscNewLog(B,&mumps));
2983 
2984   B->ops->view    = MatView_MUMPS;
2985   B->ops->getinfo = MatGetInfo_MUMPS;
2986 
2987   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
2988   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
2989   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
2990   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
2991   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
2992   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
2993   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
2994   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
2995   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
2996   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
2997   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
2998   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
2999   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3000 
3001   if (ftype == MAT_FACTOR_LU) {
3002     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3003     B->factortype            = MAT_FACTOR_LU;
3004     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3005     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3006     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3007     mumps->sym = 0;
3008   } else {
3009     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3010     B->factortype                  = MAT_FACTOR_CHOLESKY;
3011     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3012     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3013     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3014 #if defined(PETSC_USE_COMPLEX)
3015     mumps->sym = 2;
3016 #else
3017     if (A->spd_set && A->spd) mumps->sym = 1;
3018     else                      mumps->sym = 2;
3019 #endif
3020   }
3021 
3022   /* set solvertype */
3023   PetscCall(PetscFree(B->solvertype));
3024   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3025   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3026   if (size == 1) {
3027     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3028     B->canuseordering = PETSC_TRUE;
3029   }
3030   B->ops->destroy = MatDestroy_MUMPS;
3031   B->data         = (void*)mumps;
3032 
3033   PetscCall(PetscInitializeMUMPS(A,mumps));
3034 
3035   *F = B;
3036   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3037   PetscFunctionReturn(0);
3038 }
3039 
3040 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3041 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3042 {
3043   Mat            B;
3044   Mat_MUMPS      *mumps;
3045   PetscBool      isSeqSBAIJ;
3046   PetscMPIInt    size;
3047 
3048   PetscFunctionBegin;
3049  #if defined(PETSC_USE_COMPLEX)
3050   PetscCheckFalse(A->hermitian && !A->symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3051  #endif
3052   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3053   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3054   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3055   PetscCall(MatSetUp(B));
3056 
3057   PetscCall(PetscNewLog(B,&mumps));
3058   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ));
3059   if (isSeqSBAIJ) {
3060     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3061   } else {
3062     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3063   }
3064 
3065   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3066   B->ops->view                   = MatView_MUMPS;
3067   B->ops->getinfo                = MatGetInfo_MUMPS;
3068 
3069   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3070   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3071   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3072   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3073   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3074   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3075   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3076   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3077   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3078   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3079   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3080   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3081   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3082 
3083   B->factortype = MAT_FACTOR_CHOLESKY;
3084 #if defined(PETSC_USE_COMPLEX)
3085   mumps->sym = 2;
3086 #else
3087   if (A->spd_set && A->spd) mumps->sym = 1;
3088   else                      mumps->sym = 2;
3089 #endif
3090 
3091   /* set solvertype */
3092   PetscCall(PetscFree(B->solvertype));
3093   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3094   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3095   if (size == 1) {
3096     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3097     B->canuseordering = PETSC_TRUE;
3098   }
3099   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3100   B->ops->destroy = MatDestroy_MUMPS;
3101   B->data         = (void*)mumps;
3102 
3103   PetscCall(PetscInitializeMUMPS(A,mumps));
3104 
3105   *F = B;
3106   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3107   PetscFunctionReturn(0);
3108 }
3109 
3110 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3111 {
3112   Mat            B;
3113   Mat_MUMPS      *mumps;
3114   PetscBool      isSeqBAIJ;
3115   PetscMPIInt    size;
3116 
3117   PetscFunctionBegin;
3118   /* Create the factorization matrix */
3119   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ));
3120   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3121   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3122   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3123   PetscCall(MatSetUp(B));
3124 
3125   PetscCall(PetscNewLog(B,&mumps));
3126   if (ftype == MAT_FACTOR_LU) {
3127     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3128     B->factortype            = MAT_FACTOR_LU;
3129     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3130     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3131     mumps->sym = 0;
3132     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3133   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3134 
3135   B->ops->view        = MatView_MUMPS;
3136   B->ops->getinfo     = MatGetInfo_MUMPS;
3137 
3138   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3139   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3140   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3141   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3142   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3143   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3144   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3145   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3146   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3147   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3148   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3149   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3150   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3151 
3152   /* set solvertype */
3153   PetscCall(PetscFree(B->solvertype));
3154   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3155   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3156   if (size == 1) {
3157     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3158     B->canuseordering = PETSC_TRUE;
3159   }
3160   B->ops->destroy = MatDestroy_MUMPS;
3161   B->data         = (void*)mumps;
3162 
3163   PetscCall(PetscInitializeMUMPS(A,mumps));
3164 
3165   *F = B;
3166   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3167   PetscFunctionReturn(0);
3168 }
3169 
3170 /* MatGetFactor for Seq and MPI SELL matrices */
3171 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3172 {
3173   Mat            B;
3174   Mat_MUMPS      *mumps;
3175   PetscBool      isSeqSELL;
3176   PetscMPIInt    size;
3177 
3178   PetscFunctionBegin;
3179   /* Create the factorization matrix */
3180   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL));
3181   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3182   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3183   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3184   PetscCall(MatSetUp(B));
3185 
3186   PetscCall(PetscNewLog(B,&mumps));
3187 
3188   B->ops->view        = MatView_MUMPS;
3189   B->ops->getinfo     = MatGetInfo_MUMPS;
3190 
3191   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3192   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3193   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3194   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3195   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3196   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3197   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3198   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3199   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3200   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3201   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3202 
3203   if (ftype == MAT_FACTOR_LU) {
3204     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3205     B->factortype            = MAT_FACTOR_LU;
3206     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3207     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3208     mumps->sym = 0;
3209     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3210   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3211 
3212   /* set solvertype */
3213   PetscCall(PetscFree(B->solvertype));
3214   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3215   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3216   if (size == 1) {
3217     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3218     B->canuseordering = PETSC_TRUE;
3219   }
3220   B->ops->destroy = MatDestroy_MUMPS;
3221   B->data         = (void*)mumps;
3222 
3223   PetscCall(PetscInitializeMUMPS(A,mumps));
3224 
3225   *F = B;
3226   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3227   PetscFunctionReturn(0);
3228 }
3229 
3230 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3231 {
3232   PetscFunctionBegin;
3233   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3234   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3235   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3236   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3237   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3238   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3239   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3240   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3241   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3242   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3243   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps));
3244   PetscFunctionReturn(0);
3245 }
3246