xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 26cc229b24afffee8e9a203ddc8754b40d660fc7)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30 
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45 
46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48    naming convention in PetscMPIInt, PetscBLASInt etc.
49 */
50 typedef MUMPS_INT PetscMUMPSInt;
51 
52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55   #endif
56 #else
57   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #endif
61 
62 #define MPIU_MUMPSINT             MPI_INT
63 #define PETSC_MUMPS_INT_MAX       2147483647
64 #define PETSC_MUMPS_INT_MIN       -2147483648
65 
66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68 {
69   PetscFunctionBegin;
70 #if PetscDefined(USE_64BIT_INDICES)
71   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
72 #endif
73   *b = (PetscMUMPSInt)(a);
74   PetscFunctionReturn(0);
75 }
76 
77 /* Put these utility routines here since they are only used in this file */
78 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
79 {
80   PetscInt       myval;
81   PetscBool      myset;
82   PetscFunctionBegin;
83   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
84   PetscCall(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub));
85   if (myset) PetscCall(PetscMUMPSIntCast(myval,value));
86   if (set) *set = myset;
87   PetscFunctionReturn(0);
88 }
89 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
90 
91 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
92 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
93 #define PetscMUMPS_c(mumps) \
94   do { \
95     if (mumps->use_petsc_omp_support) { \
96       if (mumps->is_omp_master) { \
97         PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
98         MUMPS_c(&mumps->id); \
99         PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
100       } \
101       PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
102       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
103          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
104          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
105          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
106       */ \
107       PetscCallMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\
108       PetscCallMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm));\
109       PetscCallMPI(MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm));\
110     } else { \
111       MUMPS_c(&mumps->id); \
112     } \
113   } while (0)
114 #else
115 #define PetscMUMPS_c(mumps) \
116   do { MUMPS_c(&mumps->id); } while (0)
117 #endif
118 
119 /* declare MumpsScalar */
120 #if defined(PETSC_USE_COMPLEX)
121 #if defined(PETSC_USE_REAL_SINGLE)
122 #define MumpsScalar mumps_complex
123 #else
124 #define MumpsScalar mumps_double_complex
125 #endif
126 #else
127 #define MumpsScalar PetscScalar
128 #endif
129 
130 /* macros s.t. indices match MUMPS documentation */
131 #define ICNTL(I) icntl[(I)-1]
132 #define CNTL(I) cntl[(I)-1]
133 #define INFOG(I) infog[(I)-1]
134 #define INFO(I) info[(I)-1]
135 #define RINFOG(I) rinfog[(I)-1]
136 #define RINFO(I) rinfo[(I)-1]
137 
138 typedef struct Mat_MUMPS Mat_MUMPS;
139 struct Mat_MUMPS {
140 #if defined(PETSC_USE_COMPLEX)
141 #if defined(PETSC_USE_REAL_SINGLE)
142   CMUMPS_STRUC_C id;
143 #else
144   ZMUMPS_STRUC_C id;
145 #endif
146 #else
147 #if defined(PETSC_USE_REAL_SINGLE)
148   SMUMPS_STRUC_C id;
149 #else
150   DMUMPS_STRUC_C id;
151 #endif
152 #endif
153 
154   MatStructure   matstruc;
155   PetscMPIInt    myid,petsc_size;
156   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
157   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
158   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
159   PetscMUMPSInt  sym;
160   MPI_Comm       mumps_comm;
161   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
162   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
163   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
164   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
165 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
166   PetscInt       *rhs_nrow,max_nrhs;
167   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
168   PetscScalar    *rhs_loc,*rhs_recvbuf;
169 #endif
170   Vec            b_seq,x_seq;
171   PetscInt       ninfo,*info;           /* which INFO to display */
172   PetscInt       sizeredrhs;
173   PetscScalar    *schur_sol;
174   PetscInt       schur_sizesol;
175   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
176   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
177   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
178 
179   /* stuff used by petsc/mumps OpenMP support*/
180   PetscBool      use_petsc_omp_support;
181   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
182   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
183   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
184   PetscMPIInt    tag,omp_comm_size;
185   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
186   MPI_Request    *reqs;
187 };
188 
189 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
190    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
191  */
192 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
193 {
194   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_USE_64BIT_INDICES)
198   {
199     PetscInt i;
200     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201       PetscCall(PetscFree(mumps->ia_alloc));
202       PetscCall(PetscMalloc1(nrow+1,&mumps->ia_alloc));
203       mumps->cur_ilen = nrow+1;
204     }
205     if (nnz > mumps->cur_jlen) {
206       PetscCall(PetscFree(mumps->ja_alloc));
207       PetscCall(PetscMalloc1(nnz,&mumps->ja_alloc));
208       mumps->cur_jlen = nnz;
209     }
210     for (i=0; i<nrow+1; i++) PetscCall(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i])));
211     for (i=0; i<nnz; i++)    PetscCall(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i])));
212     *ia_mumps = mumps->ia_alloc;
213     *ja_mumps = mumps->ja_alloc;
214   }
215 #else
216   *ia_mumps = ia;
217   *ja_mumps = ja;
218 #endif
219   PetscCall(PetscMUMPSIntCast(nnz,nnz_mumps));
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
224 {
225   PetscFunctionBegin;
226   PetscCall(PetscFree(mumps->id.listvar_schur));
227   PetscCall(PetscFree(mumps->id.redrhs));
228   PetscCall(PetscFree(mumps->schur_sol));
229   mumps->id.size_schur = 0;
230   mumps->id.schur_lld  = 0;
231   mumps->id.ICNTL(19)  = 0;
232   PetscFunctionReturn(0);
233 }
234 
235 /* solve with rhs in mumps->id.redrhs and return in the same location */
236 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
237 {
238   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
239   Mat                  S,B,X;
240   MatFactorSchurStatus schurstatus;
241   PetscInt             sizesol;
242 
243   PetscFunctionBegin;
244   PetscCall(MatFactorFactorizeSchurComplement(F));
245   PetscCall(MatFactorGetSchurComplement(F,&S,&schurstatus));
246   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B));
247   PetscCall(MatSetType(B,((PetscObject)S)->type_name));
248 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
249   PetscCall(MatBindToCPU(B,S->boundtocpu));
250 #endif
251   switch (schurstatus) {
252   case MAT_FACTOR_SCHUR_FACTORED:
253     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X));
254     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
255 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
256     PetscCall(MatBindToCPU(X,S->boundtocpu));
257 #endif
258     if (!mumps->id.ICNTL(9)) { /* transpose solve */
259       PetscCall(MatMatSolveTranspose(S,B,X));
260     } else {
261       PetscCall(MatMatSolve(S,B,X));
262     }
263     break;
264   case MAT_FACTOR_SCHUR_INVERTED:
265     sizesol = mumps->id.nrhs*mumps->id.size_schur;
266     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
267       PetscCall(PetscFree(mumps->schur_sol));
268       PetscCall(PetscMalloc1(sizesol,&mumps->schur_sol));
269       mumps->schur_sizesol = sizesol;
270     }
271     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X));
272     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
274     PetscCall(MatBindToCPU(X,S->boundtocpu));
275 #endif
276     PetscCall(MatProductCreateWithMat(S,B,NULL,X));
277     if (!mumps->id.ICNTL(9)) { /* transpose solve */
278       PetscCall(MatProductSetType(X,MATPRODUCT_AtB));
279     } else {
280       PetscCall(MatProductSetType(X,MATPRODUCT_AB));
281     }
282     PetscCall(MatProductSetFromOptions(X));
283     PetscCall(MatProductSymbolic(X));
284     PetscCall(MatProductNumeric(X));
285 
286     PetscCall(MatCopy(X,B,SAME_NONZERO_PATTERN));
287     break;
288   default:
289     SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status);
290   }
291   PetscCall(MatFactorRestoreSchurComplement(F,&S,schurstatus));
292   PetscCall(MatDestroy(&B));
293   PetscCall(MatDestroy(&X));
294   PetscFunctionReturn(0);
295 }
296 
297 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
298 {
299   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
300 
301   PetscFunctionBegin;
302   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
303     PetscFunctionReturn(0);
304   }
305   if (!expansion) { /* prepare for the condensation step */
306     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
307     /* allocate MUMPS internal array to store reduced right-hand sides */
308     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
309       PetscCall(PetscFree(mumps->id.redrhs));
310       mumps->id.lredrhs = mumps->id.size_schur;
311       PetscCall(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs));
312       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
313     }
314     mumps->id.ICNTL(26) = 1; /* condensation phase */
315   } else { /* prepare for the expansion step */
316     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
317     PetscCall(MatMumpsSolveSchur_Private(F));
318     mumps->id.ICNTL(26) = 2; /* expansion phase */
319     PetscMUMPS_c(mumps);
320     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
321     /* restore defaults */
322     mumps->id.ICNTL(26) = -1;
323     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
324     if (mumps->id.nrhs > 1) {
325       PetscCall(PetscFree(mumps->id.redrhs));
326       mumps->id.lredrhs = 0;
327       mumps->sizeredrhs = 0;
328     }
329   }
330   PetscFunctionReturn(0);
331 }
332 
333 /*
334   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
335 
336   input:
337     A       - matrix in aij,baij or sbaij format
338     shift   - 0: C style output triple; 1: Fortran style output triple.
339     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
340               MAT_REUSE_MATRIX:   only the values in v array are updated
341   output:
342     nnz     - dim of r, c, and v (number of local nonzero entries of A)
343     r, c, v - row and col index, matrix values (matrix triples)
344 
345   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
346   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
347   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
348 
349  */
350 
351 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
352 {
353   const PetscScalar *av;
354   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
355   PetscInt64        nz,rnz,i,j,k;
356   PetscMUMPSInt     *row,*col;
357   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
358 
359   PetscFunctionBegin;
360   PetscCall(MatSeqAIJGetArrayRead(A,&av));
361   mumps->val = (PetscScalar*)av;
362   if (reuse == MAT_INITIAL_MATRIX) {
363     nz   = aa->nz;
364     ai   = aa->i;
365     aj   = aa->j;
366     PetscCall(PetscMalloc2(nz,&row,nz,&col));
367     for (i=k=0; i<M; i++) {
368       rnz = ai[i+1] - ai[i];
369       ajj = aj + ai[i];
370       for (j=0; j<rnz; j++) {
371         PetscCall(PetscMUMPSIntCast(i+shift,&row[k]));
372         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[k]));
373         k++;
374       }
375     }
376     mumps->irn = row;
377     mumps->jcn = col;
378     mumps->nnz = nz;
379   }
380   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
381   PetscFunctionReturn(0);
382 }
383 
384 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
385 {
386   PetscInt64     nz,i,j,k,r;
387   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
388   PetscMUMPSInt  *row,*col;
389 
390   PetscFunctionBegin;
391   mumps->val = a->val;
392   if (reuse == MAT_INITIAL_MATRIX) {
393     nz   = a->sliidx[a->totalslices];
394     PetscCall(PetscMalloc2(nz,&row,nz,&col));
395     for (i=k=0; i<a->totalslices; i++) {
396       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
397         PetscCall(PetscMUMPSIntCast(8*i+r+shift,&row[k++]));
398       }
399     }
400     for (i=0;i<nz;i++) PetscCall(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]));
401     mumps->irn = row;
402     mumps->jcn = col;
403     mumps->nnz = nz;
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
409 {
410   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
411   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
412   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
413   PetscInt       bs;
414   PetscMUMPSInt  *row,*col;
415 
416   PetscFunctionBegin;
417   PetscCall(MatGetBlockSize(A,&bs));
418   M          = A->rmap->N/bs;
419   mumps->val = aa->a;
420   if (reuse == MAT_INITIAL_MATRIX) {
421     ai   = aa->i; aj = aa->j;
422     nz   = bs2*aa->nz;
423     PetscCall(PetscMalloc2(nz,&row,nz,&col));
424     for (i=0; i<M; i++) {
425       ajj = aj + ai[i];
426       rnz = ai[i+1] - ai[i];
427       for (k=0; k<rnz; k++) {
428         for (j=0; j<bs; j++) {
429           for (m=0; m<bs; m++) {
430             PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[idx]));
431             PetscCall(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]));
432             idx++;
433           }
434         }
435       }
436     }
437     mumps->irn = row;
438     mumps->jcn = col;
439     mumps->nnz = nz;
440   }
441   PetscFunctionReturn(0);
442 }
443 
444 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
445 {
446   const PetscInt *ai, *aj,*ajj;
447   PetscInt        bs;
448   PetscInt64      nz,rnz,i,j,k,m;
449   PetscMUMPSInt   *row,*col;
450   PetscScalar     *val;
451   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
452   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
453 #if defined(PETSC_USE_COMPLEX)
454   PetscBool       hermitian;
455 #endif
456 
457   PetscFunctionBegin;
458 #if defined(PETSC_USE_COMPLEX)
459   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
460   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
461 #endif
462   ai   = aa->i;
463   aj   = aa->j;
464   PetscCall(MatGetBlockSize(A,&bs));
465   if (reuse == MAT_INITIAL_MATRIX) {
466     nz   = aa->nz;
467     PetscCall(PetscMalloc2(bs2*nz,&row,bs2*nz,&col));
468     if (bs>1) {
469       PetscCall(PetscMalloc1(bs2*nz,&mumps->val_alloc));
470       mumps->val = mumps->val_alloc;
471     } else {
472       mumps->val = aa->a;
473     }
474     mumps->irn = row;
475     mumps->jcn = col;
476   } else {
477     if (bs == 1) mumps->val = aa->a;
478     row = mumps->irn;
479     col = mumps->jcn;
480   }
481   val = mumps->val;
482 
483   nz = 0;
484   if (bs>1) {
485     for (i=0; i<mbs; i++) {
486       rnz = ai[i+1] - ai[i];
487       ajj = aj + ai[i];
488       for (j=0; j<rnz; j++) {
489         for (k=0; k<bs; k++) {
490           for (m=0; m<bs; m++) {
491             if (ajj[j]>i || k>=m) {
492               if (reuse == MAT_INITIAL_MATRIX) {
493                 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[nz]));
494                 PetscCall(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]));
495               }
496               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
497             }
498           }
499         }
500       }
501     }
502   } else if (reuse == MAT_INITIAL_MATRIX) {
503     for (i=0; i<mbs; i++) {
504       rnz = ai[i+1] - ai[i];
505       ajj = aj + ai[i];
506       for (j=0; j<rnz; j++) {
507         PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
508         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
509         nz++;
510       }
511     }
512     PetscCheck(nz == aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz);
513   }
514   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
515   PetscFunctionReturn(0);
516 }
517 
518 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
519 {
520   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
521   PetscInt64        nz,rnz,i,j;
522   const PetscScalar *av,*v1;
523   PetscScalar       *val;
524   PetscMUMPSInt     *row,*col;
525   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
526   PetscBool         missing;
527 #if defined(PETSC_USE_COMPLEX)
528   PetscBool         hermitian;
529 #endif
530 
531   PetscFunctionBegin;
532 #if defined(PETSC_USE_COMPLEX)
533   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
534   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
535 #endif
536   PetscCall(MatSeqAIJGetArrayRead(A,&av));
537   ai    = aa->i; aj = aa->j;
538   adiag = aa->diag;
539   PetscCall(MatMissingDiagonal_SeqAIJ(A,&missing,NULL));
540   if (reuse == MAT_INITIAL_MATRIX) {
541     /* count nz in the upper triangular part of A */
542     nz = 0;
543     if (missing) {
544       for (i=0; i<M; i++) {
545         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
546           for (j=ai[i];j<ai[i+1];j++) {
547             if (aj[j] < i) continue;
548             nz++;
549           }
550         } else {
551           nz += ai[i+1] - adiag[i];
552         }
553       }
554     } else {
555       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
556     }
557     PetscCall(PetscMalloc2(nz,&row,nz,&col));
558     PetscCall(PetscMalloc1(nz,&val));
559     mumps->nnz = nz;
560     mumps->irn = row;
561     mumps->jcn = col;
562     mumps->val = mumps->val_alloc = val;
563 
564     nz = 0;
565     if (missing) {
566       for (i=0; i<M; i++) {
567         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
568           for (j=ai[i];j<ai[i+1];j++) {
569             if (aj[j] < i) continue;
570             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
571             PetscCall(PetscMUMPSIntCast(aj[j]+shift,&col[nz]));
572             val[nz] = av[j];
573             nz++;
574           }
575         } else {
576           rnz = ai[i+1] - adiag[i];
577           ajj = aj + adiag[i];
578           v1  = av + adiag[i];
579           for (j=0; j<rnz; j++) {
580             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
581             PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
582             val[nz++] = v1[j];
583           }
584         }
585       }
586     } else {
587       for (i=0; i<M; i++) {
588         rnz = ai[i+1] - adiag[i];
589         ajj = aj + adiag[i];
590         v1  = av + adiag[i];
591         for (j=0; j<rnz; j++) {
592           PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
593           PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
594           val[nz++] = v1[j];
595         }
596       }
597     }
598   } else {
599     nz = 0;
600     val = mumps->val;
601     if (missing) {
602       for (i=0; i <M; i++) {
603         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
604           for (j=ai[i];j<ai[i+1];j++) {
605             if (aj[j] < i) continue;
606             val[nz++] = av[j];
607           }
608         } else {
609           rnz = ai[i+1] - adiag[i];
610           v1  = av + adiag[i];
611           for (j=0; j<rnz; j++) {
612             val[nz++] = v1[j];
613           }
614         }
615       }
616     } else {
617       for (i=0; i <M; i++) {
618         rnz = ai[i+1] - adiag[i];
619         v1  = av + adiag[i];
620         for (j=0; j<rnz; j++) {
621           val[nz++] = v1[j];
622         }
623       }
624     }
625   }
626   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
627   PetscFunctionReturn(0);
628 }
629 
630 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
631 {
632   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
633   PetscInt          bs;
634   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
635   PetscMUMPSInt     *row,*col;
636   const PetscScalar *av,*bv,*v1,*v2;
637   PetscScalar       *val;
638   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
639   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
640   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
641   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
642 #if defined(PETSC_USE_COMPLEX)
643   PetscBool         hermitian;
644 #endif
645 
646   PetscFunctionBegin;
647 #if defined(PETSC_USE_COMPLEX)
648   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
649   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
650 #endif
651   PetscCall(MatGetBlockSize(A,&bs));
652   rstart = A->rmap->rstart;
653   ai = aa->i;
654   aj = aa->j;
655   bi = bb->i;
656   bj = bb->j;
657   av = aa->a;
658   bv = bb->a;
659 
660   garray = mat->garray;
661 
662   if (reuse == MAT_INITIAL_MATRIX) {
663     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
664     PetscCall(PetscMalloc2(nz,&row,nz,&col));
665     PetscCall(PetscMalloc1(nz,&val));
666     /* can not decide the exact mumps->nnz now because of the SBAIJ */
667     mumps->irn = row;
668     mumps->jcn = col;
669     mumps->val = mumps->val_alloc = val;
670   } else {
671     val = mumps->val;
672   }
673 
674   jj = 0; irow = rstart;
675   for (i=0; i<mbs; i++) {
676     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
677     countA = ai[i+1] - ai[i];
678     countB = bi[i+1] - bi[i];
679     bjj    = bj + bi[i];
680     v1     = av + ai[i]*bs2;
681     v2     = bv + bi[i]*bs2;
682 
683     if (bs>1) {
684       /* A-part */
685       for (j=0; j<countA; j++) {
686         for (k=0; k<bs; k++) {
687           for (m=0; m<bs; m++) {
688             if (rstart + ajj[j]*bs>irow || k>=m) {
689               if (reuse == MAT_INITIAL_MATRIX) {
690                 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
691                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]));
692               }
693               val[jj++] = v1[j*bs2 + m + k*bs];
694             }
695           }
696         }
697       }
698 
699       /* B-part */
700       for (j=0; j < countB; j++) {
701         for (k=0; k<bs; k++) {
702           for (m=0; m<bs; m++) {
703             if (reuse == MAT_INITIAL_MATRIX) {
704               PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
705               PetscCall(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]));
706             }
707             val[jj++] = v2[j*bs2 + m + k*bs];
708           }
709         }
710       }
711     } else {
712       /* A-part */
713       for (j=0; j<countA; j++) {
714         if (reuse == MAT_INITIAL_MATRIX) {
715           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
716           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
717         }
718         val[jj++] = v1[j];
719       }
720 
721       /* B-part */
722       for (j=0; j < countB; j++) {
723         if (reuse == MAT_INITIAL_MATRIX) {
724           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
725           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
726         }
727         val[jj++] = v2[j];
728       }
729     }
730     irow+=bs;
731   }
732   mumps->nnz = jj;
733   PetscFunctionReturn(0);
734 }
735 
736 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
737 {
738   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
739   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
740   PetscMUMPSInt     *row,*col;
741   const PetscScalar *av, *bv,*v1,*v2;
742   PetscScalar       *val;
743   Mat               Ad,Ao;
744   Mat_SeqAIJ        *aa;
745   Mat_SeqAIJ        *bb;
746 
747   PetscFunctionBegin;
748   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
749   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
750   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
751 
752   aa = (Mat_SeqAIJ*)(Ad)->data;
753   bb = (Mat_SeqAIJ*)(Ao)->data;
754   ai = aa->i;
755   aj = aa->j;
756   bi = bb->i;
757   bj = bb->j;
758 
759   rstart = A->rmap->rstart;
760 
761   if (reuse == MAT_INITIAL_MATRIX) {
762     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
763     PetscCall(PetscMalloc2(nz,&row,nz,&col));
764     PetscCall(PetscMalloc1(nz,&val));
765     mumps->nnz = nz;
766     mumps->irn = row;
767     mumps->jcn = col;
768     mumps->val = mumps->val_alloc = val;
769   } else {
770     val = mumps->val;
771   }
772 
773   jj = 0; irow = rstart;
774   for (i=0; i<m; i++) {
775     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
776     countA = ai[i+1] - ai[i];
777     countB = bi[i+1] - bi[i];
778     bjj    = bj + bi[i];
779     v1     = av + ai[i];
780     v2     = bv + bi[i];
781 
782     /* A-part */
783     for (j=0; j<countA; j++) {
784       if (reuse == MAT_INITIAL_MATRIX) {
785         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
786         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
787       }
788       val[jj++] = v1[j];
789     }
790 
791     /* B-part */
792     for (j=0; j < countB; j++) {
793       if (reuse == MAT_INITIAL_MATRIX) {
794         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
795         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
796       }
797       val[jj++] = v2[j];
798     }
799     irow++;
800   }
801   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
802   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
803   PetscFunctionReturn(0);
804 }
805 
806 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
807 {
808   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
809   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
810   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
811   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
812   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
813   const PetscInt    bs2=mat->bs2;
814   PetscInt          bs;
815   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
816   PetscMUMPSInt     *row,*col;
817   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
818   PetscScalar       *val;
819 
820   PetscFunctionBegin;
821   PetscCall(MatGetBlockSize(A,&bs));
822   if (reuse == MAT_INITIAL_MATRIX) {
823     nz   = bs2*(aa->nz + bb->nz);
824     PetscCall(PetscMalloc2(nz,&row,nz,&col));
825     PetscCall(PetscMalloc1(nz,&val));
826     mumps->nnz = nz;
827     mumps->irn = row;
828     mumps->jcn = col;
829     mumps->val = mumps->val_alloc = val;
830   } else {
831     val = mumps->val;
832   }
833 
834   jj = 0; irow = rstart;
835   for (i=0; i<mbs; i++) {
836     countA = ai[i+1] - ai[i];
837     countB = bi[i+1] - bi[i];
838     ajj    = aj + ai[i];
839     bjj    = bj + bi[i];
840     v1     = av + bs2*ai[i];
841     v2     = bv + bs2*bi[i];
842 
843     idx = 0;
844     /* A-part */
845     for (k=0; k<countA; k++) {
846       for (j=0; j<bs; j++) {
847         for (n=0; n<bs; n++) {
848           if (reuse == MAT_INITIAL_MATRIX) {
849             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
850             PetscCall(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]));
851           }
852           val[jj++] = v1[idx++];
853         }
854       }
855     }
856 
857     idx = 0;
858     /* B-part */
859     for (k=0; k<countB; k++) {
860       for (j=0; j<bs; j++) {
861         for (n=0; n<bs; n++) {
862           if (reuse == MAT_INITIAL_MATRIX) {
863             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
864             PetscCall(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]));
865           }
866           val[jj++] = v2[idx++];
867         }
868       }
869     }
870     irow += bs;
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
876 {
877   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
878   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
879   PetscMUMPSInt     *row,*col;
880   const PetscScalar *av, *bv,*v1,*v2;
881   PetscScalar       *val;
882   Mat               Ad,Ao;
883   Mat_SeqAIJ        *aa;
884   Mat_SeqAIJ        *bb;
885 #if defined(PETSC_USE_COMPLEX)
886   PetscBool         hermitian;
887 #endif
888 
889   PetscFunctionBegin;
890 #if defined(PETSC_USE_COMPLEX)
891   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
892   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
893 #endif
894   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
895   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
896   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
897 
898   aa    = (Mat_SeqAIJ*)(Ad)->data;
899   bb    = (Mat_SeqAIJ*)(Ao)->data;
900   ai    = aa->i;
901   aj    = aa->j;
902   adiag = aa->diag;
903   bi    = bb->i;
904   bj    = bb->j;
905 
906   rstart = A->rmap->rstart;
907 
908   if (reuse == MAT_INITIAL_MATRIX) {
909     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
910     nzb = 0;    /* num of upper triangular entries in mat->B */
911     for (i=0; i<m; i++) {
912       nza   += (ai[i+1] - adiag[i]);
913       countB = bi[i+1] - bi[i];
914       bjj    = bj + bi[i];
915       for (j=0; j<countB; j++) {
916         if (garray[bjj[j]] > rstart) nzb++;
917       }
918     }
919 
920     nz   = nza + nzb; /* total nz of upper triangular part of mat */
921     PetscCall(PetscMalloc2(nz,&row,nz,&col));
922     PetscCall(PetscMalloc1(nz,&val));
923     mumps->nnz = nz;
924     mumps->irn = row;
925     mumps->jcn = col;
926     mumps->val = mumps->val_alloc = val;
927   } else {
928     val = mumps->val;
929   }
930 
931   jj = 0; irow = rstart;
932   for (i=0; i<m; i++) {
933     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
934     v1     = av + adiag[i];
935     countA = ai[i+1] - adiag[i];
936     countB = bi[i+1] - bi[i];
937     bjj    = bj + bi[i];
938     v2     = bv + bi[i];
939 
940     /* A-part */
941     for (j=0; j<countA; j++) {
942       if (reuse == MAT_INITIAL_MATRIX) {
943         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
944         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
945       }
946       val[jj++] = v1[j];
947     }
948 
949     /* B-part */
950     for (j=0; j < countB; j++) {
951       if (garray[bjj[j]] > rstart) {
952         if (reuse == MAT_INITIAL_MATRIX) {
953           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
954           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
955         }
956         val[jj++] = v2[j];
957       }
958     }
959     irow++;
960   }
961   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
962   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatDestroy_MUMPS(Mat A)
967 {
968   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
969 
970   PetscFunctionBegin;
971   PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
972   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
973   PetscCall(VecScatterDestroy(&mumps->scat_sol));
974   PetscCall(VecDestroy(&mumps->b_seq));
975   PetscCall(VecDestroy(&mumps->x_seq));
976   PetscCall(PetscFree(mumps->id.perm_in));
977   PetscCall(PetscFree2(mumps->irn,mumps->jcn));
978   PetscCall(PetscFree(mumps->val_alloc));
979   PetscCall(PetscFree(mumps->info));
980   PetscCall(MatMumpsResetSchur_Private(mumps));
981   mumps->id.job = JOB_END;
982   PetscMUMPS_c(mumps);
983   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
984 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
985   if (mumps->use_petsc_omp_support) {
986     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
987     PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
988     PetscCall(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps));
989   }
990 #endif
991   PetscCall(PetscFree(mumps->ia_alloc));
992   PetscCall(PetscFree(mumps->ja_alloc));
993   PetscCall(PetscFree(mumps->recvcount));
994   PetscCall(PetscFree(mumps->reqs));
995   PetscCall(PetscFree(mumps->irhs_loc));
996   if (mumps->mumps_comm != MPI_COMM_NULL) PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm));
997   PetscCall(PetscFree(A->data));
998 
999   /* clear composed functions */
1000   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1001   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL));
1002   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL));
1003   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL));
1004   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL));
1005   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL));
1006   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL));
1007   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL));
1008   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL));
1009   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL));
1010   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL));
1011   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL));
1012   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL));
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1017 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1018 {
1019   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1020   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1021   PetscInt           i,m,M,rstart;
1022 
1023   PetscFunctionBegin;
1024   PetscCall(MatGetSize(A,&M,NULL));
1025   PetscCall(MatGetLocalSize(A,&m,NULL));
1026   PetscCheck(M <= PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1027   if (ompsize == 1) {
1028     if (!mumps->irhs_loc) {
1029       mumps->nloc_rhs = m;
1030       PetscCall(PetscMalloc1(m,&mumps->irhs_loc));
1031       PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1032       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1033     }
1034     mumps->id.rhs_loc = (MumpsScalar*)array;
1035   } else {
1036   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1037     const PetscInt  *ranges;
1038     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1039     MPI_Group       petsc_group,omp_group;
1040     PetscScalar     *recvbuf=NULL;
1041 
1042     if (mumps->is_omp_master) {
1043       /* Lazily initialize the omp stuff for distributed rhs */
1044       if (!mumps->irhs_loc) {
1045         PetscCall(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks));
1046         PetscCall(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps));
1047         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group));
1048         PetscCallMPI(MPI_Comm_group(mumps->omp_comm,&omp_group));
1049         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1050         PetscCallMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks));
1051 
1052         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1053         mumps->nloc_rhs = 0;
1054         PetscCall(MatGetOwnershipRanges(A,&ranges));
1055         for (j=0; j<ompsize; j++) {
1056           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1057           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1058         }
1059         PetscCall(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc));
1060         for (j=k=0; j<ompsize; j++) {
1061           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1062         }
1063 
1064         PetscCall(PetscFree2(omp_ranks,petsc_ranks));
1065         PetscCallMPI(MPI_Group_free(&petsc_group));
1066         PetscCallMPI(MPI_Group_free(&omp_group));
1067       }
1068 
1069       /* Realloc buffers when current nrhs is bigger than what we have met */
1070       if (nrhs > mumps->max_nrhs) {
1071         PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
1072         PetscCall(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf));
1073         mumps->max_nrhs = nrhs;
1074       }
1075 
1076       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1077       for (j=0; j<ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]));
1078       mumps->rhs_disps[0] = 0;
1079       for (j=1; j<ompsize; j++) {
1080         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1081         PetscCheck(mumps->rhs_disps[j] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1082       }
1083       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1084     }
1085 
1086     PetscCall(PetscMPIIntCast(m*nrhs,&sendcount));
1087     PetscCallMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm));
1088 
1089     if (mumps->is_omp_master) {
1090       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1091         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1092         for (j=0; j<ompsize; j++) {
1093           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1094           dst = dstbase;
1095           for (i=0; i<nrhs; i++) {
1096             PetscCall(PetscArraycpy(dst,src,mumps->rhs_nrow[j]));
1097             src += mumps->rhs_nrow[j];
1098             dst += mumps->nloc_rhs;
1099           }
1100           dstbase += mumps->rhs_nrow[j];
1101         }
1102       }
1103       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1104     }
1105   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1106   }
1107   mumps->id.nrhs     = nrhs;
1108   mumps->id.nloc_rhs = mumps->nloc_rhs;
1109   mumps->id.lrhs_loc = mumps->nloc_rhs;
1110   mumps->id.irhs_loc = mumps->irhs_loc;
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1115 {
1116   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1117   const PetscScalar  *rarray = NULL;
1118   PetscScalar        *array;
1119   IS                 is_iden,is_petsc;
1120   PetscInt           i;
1121   PetscBool          second_solve = PETSC_FALSE;
1122   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1123 
1124   PetscFunctionBegin;
1125   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1));
1126   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2));
1127 
1128   if (A->factorerrortype) {
1129     PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1130     PetscCall(VecSetInf(x));
1131     PetscFunctionReturn(0);
1132   }
1133 
1134   mumps->id.nrhs = 1;
1135   if (mumps->petsc_size > 1) {
1136     if (mumps->ICNTL20 == 10) {
1137       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1138       PetscCall(VecGetArrayRead(b,&rarray));
1139       PetscCall(MatMumpsSetUpDistRHSInfo(A,1,rarray));
1140     } else {
1141       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1142       PetscCall(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1143       PetscCall(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1144       if (!mumps->myid) {
1145         PetscCall(VecGetArray(mumps->b_seq,&array));
1146         mumps->id.rhs = (MumpsScalar*)array;
1147       }
1148     }
1149   } else {  /* petsc_size == 1 */
1150     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1151     PetscCall(VecCopy(b,x));
1152     PetscCall(VecGetArray(x,&array));
1153     mumps->id.rhs = (MumpsScalar*)array;
1154   }
1155 
1156   /*
1157      handle condensation step of Schur complement (if any)
1158      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1159      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1160      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1161      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1162   */
1163   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1164     PetscCheck(mumps->petsc_size <= 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1165     second_solve = PETSC_TRUE;
1166     PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1167   }
1168   /* solve phase */
1169   /*-------------*/
1170   mumps->id.job = JOB_SOLVE;
1171   PetscMUMPS_c(mumps);
1172   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1173 
1174   /* handle expansion step of Schur complement (if any) */
1175   if (second_solve) {
1176     PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1177   }
1178 
1179   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1180     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1181       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1182       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1183     }
1184     if (!mumps->scat_sol) { /* create scatter scat_sol */
1185       PetscInt *isol2_loc=NULL;
1186       PetscCall(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */
1187       PetscCall(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc));
1188       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1189       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc));  /* to */
1190       PetscCall(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol));
1191       PetscCall(ISDestroy(&is_iden));
1192       PetscCall(ISDestroy(&is_petsc));
1193       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1194     }
1195 
1196     PetscCall(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1197     PetscCall(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1198   }
1199 
1200   if (mumps->petsc_size > 1) {
1201     if (mumps->ICNTL20 == 10) {
1202       PetscCall(VecRestoreArrayRead(b,&rarray));
1203     } else if (!mumps->myid) {
1204       PetscCall(VecRestoreArray(mumps->b_seq,&array));
1205     }
1206   } else PetscCall(VecRestoreArray(x,&array));
1207 
1208   PetscCall(PetscLogFlops(2.0*mumps->id.RINFO(3)));
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1213 {
1214   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1215 
1216   PetscFunctionBegin;
1217   mumps->id.ICNTL(9) = 0;
1218   PetscCall(MatSolve_MUMPS(A,b,x));
1219   mumps->id.ICNTL(9) = 1;
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1224 {
1225   Mat               Bt = NULL;
1226   PetscBool         denseX,denseB,flg,flgT;
1227   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1228   PetscInt          i,nrhs,M;
1229   PetscScalar       *array;
1230   const PetscScalar *rbray;
1231   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1232   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1233   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1234   IS                is_to,is_from;
1235   PetscInt          k,proc,j,m,myrstart;
1236   const PetscInt    *rstart;
1237   Vec               v_mpi,msol_loc;
1238   VecScatter        scat_sol;
1239   Vec               b_seq;
1240   VecScatter        scat_rhs;
1241   PetscScalar       *aa;
1242   PetscInt          spnr,*ia,*ja;
1243   Mat_MPIAIJ        *b = NULL;
1244 
1245   PetscFunctionBegin;
1246   PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL));
1247   PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1248 
1249   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL));
1250   if (denseB) {
1251     PetscCheck(B->rmap->n == X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1252     mumps->id.ICNTL(20)= 0; /* dense RHS */
1253   } else { /* sparse B */
1254     PetscCheck(X != B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1255     PetscCall(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT));
1256     if (flgT) { /* input B is transpose of actural RHS matrix,
1257                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1258       PetscCall(MatTransposeGetMat(B,&Bt));
1259     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1260     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1261   }
1262 
1263   PetscCall(MatGetSize(B,&M,&nrhs));
1264   mumps->id.nrhs = nrhs;
1265   mumps->id.lrhs = M;
1266   mumps->id.rhs  = NULL;
1267 
1268   if (mumps->petsc_size == 1) {
1269     PetscScalar *aa;
1270     PetscInt    spnr,*ia,*ja;
1271     PetscBool   second_solve = PETSC_FALSE;
1272 
1273     PetscCall(MatDenseGetArray(X,&array));
1274     mumps->id.rhs = (MumpsScalar*)array;
1275 
1276     if (denseB) {
1277       /* copy B to X */
1278       PetscCall(MatDenseGetArrayRead(B,&rbray));
1279       PetscCall(PetscArraycpy(array,rbray,M*nrhs));
1280       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1281     } else { /* sparse B */
1282       PetscCall(MatSeqAIJGetArray(Bt,&aa));
1283       PetscCall(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1284       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1285       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1286       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1287     }
1288     /* handle condensation step of Schur complement (if any) */
1289     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1290       second_solve = PETSC_TRUE;
1291       PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1292     }
1293     /* solve phase */
1294     /*-------------*/
1295     mumps->id.job = JOB_SOLVE;
1296     PetscMUMPS_c(mumps);
1297     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1298 
1299     /* handle expansion step of Schur complement (if any) */
1300     if (second_solve) {
1301       PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1302     }
1303     if (!denseB) { /* sparse B */
1304       PetscCall(MatSeqAIJRestoreArray(Bt,&aa));
1305       PetscCall(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1306       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1307     }
1308     PetscCall(MatDenseRestoreArray(X,&array));
1309     PetscFunctionReturn(0);
1310   }
1311 
1312   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1313   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1314 
1315   /* create msol_loc to hold mumps local solution */
1316   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1317   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1318 
1319   lsol_loc  = mumps->id.lsol_loc;
1320   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1321   PetscCall(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc));
1322   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1323   mumps->id.isol_loc = isol_loc;
1324 
1325   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc));
1326 
1327   if (denseB) {
1328     if (mumps->ICNTL20 == 10) {
1329       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1330       PetscCall(MatDenseGetArrayRead(B,&rbray));
1331       PetscCall(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray));
1332       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1333       PetscCall(MatGetLocalSize(B,&m,NULL));
1334       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi));
1335     } else {
1336       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1337       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1338         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1339         0, re-arrange B into desired order, which is a local operation.
1340       */
1341 
1342       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1343       /* wrap dense rhs matrix B into a vector v_mpi */
1344       PetscCall(MatGetLocalSize(B,&m,NULL));
1345       PetscCall(MatDenseGetArray(B,&bray));
1346       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1347       PetscCall(MatDenseRestoreArray(B,&bray));
1348 
1349       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1350       if (!mumps->myid) {
1351         PetscInt *idx;
1352         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1353         PetscCall(PetscMalloc1(nrhs*M,&idx));
1354         PetscCall(MatGetOwnershipRanges(B,&rstart));
1355         k = 0;
1356         for (proc=0; proc<mumps->petsc_size; proc++) {
1357           for (j=0; j<nrhs; j++) {
1358             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1359           }
1360         }
1361 
1362         PetscCall(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq));
1363         PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to));
1364         PetscCall(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from));
1365       } else {
1366         PetscCall(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq));
1367         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to));
1368         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from));
1369       }
1370       PetscCall(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs));
1371       PetscCall(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1372       PetscCall(ISDestroy(&is_to));
1373       PetscCall(ISDestroy(&is_from));
1374       PetscCall(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1375 
1376       if (!mumps->myid) { /* define rhs on the host */
1377         PetscCall(VecGetArray(b_seq,&bray));
1378         mumps->id.rhs = (MumpsScalar*)bray;
1379         PetscCall(VecRestoreArray(b_seq,&bray));
1380       }
1381     }
1382   } else { /* sparse B */
1383     b = (Mat_MPIAIJ*)Bt->data;
1384 
1385     /* wrap dense X into a vector v_mpi */
1386     PetscCall(MatGetLocalSize(X,&m,NULL));
1387     PetscCall(MatDenseGetArray(X,&bray));
1388     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1389     PetscCall(MatDenseRestoreArray(X,&bray));
1390 
1391     if (!mumps->myid) {
1392       PetscCall(MatSeqAIJGetArray(b->A,&aa));
1393       PetscCall(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1394       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1395       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1396       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1397     } else {
1398       mumps->id.irhs_ptr    = NULL;
1399       mumps->id.irhs_sparse = NULL;
1400       mumps->id.nz_rhs      = 0;
1401       mumps->id.rhs_sparse  = NULL;
1402     }
1403   }
1404 
1405   /* solve phase */
1406   /*-------------*/
1407   mumps->id.job = JOB_SOLVE;
1408   PetscMUMPS_c(mumps);
1409   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1410 
1411   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1412   PetscCall(MatDenseGetArray(X,&array));
1413   PetscCall(VecPlaceArray(v_mpi,array));
1414 
1415   /* create scatter scat_sol */
1416   PetscCall(MatGetOwnershipRanges(X,&rstart));
1417   /* iidx: index for scatter mumps solution to petsc X */
1418 
1419   PetscCall(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from));
1420   PetscCall(PetscMalloc1(nlsol_loc,&idxx));
1421   for (i=0; i<lsol_loc; i++) {
1422     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1423 
1424     for (proc=0; proc<mumps->petsc_size; proc++) {
1425       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1426         myrstart = rstart[proc];
1427         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1428         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1429         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1430         break;
1431       }
1432     }
1433 
1434     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1435   }
1436   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to));
1437   PetscCall(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol));
1438   PetscCall(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1439   PetscCall(ISDestroy(&is_from));
1440   PetscCall(ISDestroy(&is_to));
1441   PetscCall(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1442   PetscCall(MatDenseRestoreArray(X,&array));
1443 
1444   /* free spaces */
1445   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1446   mumps->id.isol_loc = isol_loc_save;
1447 
1448   PetscCall(PetscFree2(sol_loc,isol_loc));
1449   PetscCall(PetscFree(idxx));
1450   PetscCall(VecDestroy(&msol_loc));
1451   PetscCall(VecDestroy(&v_mpi));
1452   if (!denseB) {
1453     if (!mumps->myid) {
1454       b = (Mat_MPIAIJ*)Bt->data;
1455       PetscCall(MatSeqAIJRestoreArray(b->A,&aa));
1456       PetscCall(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1457       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1458     }
1459   } else {
1460     if (mumps->ICNTL20 == 0) {
1461       PetscCall(VecDestroy(&b_seq));
1462       PetscCall(VecScatterDestroy(&scat_rhs));
1463     }
1464   }
1465   PetscCall(VecScatterDestroy(&scat_sol));
1466   PetscCall(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3)));
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1471 {
1472   PetscBool      flg;
1473   Mat            B;
1474 
1475   PetscFunctionBegin;
1476   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
1477   PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1478 
1479   /* Create B=Bt^T that uses Bt's data structure */
1480   PetscCall(MatCreateTranspose(Bt,&B));
1481 
1482   PetscCall(MatMatSolve_MUMPS(A,B,X));
1483   PetscCall(MatDestroy(&B));
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 #if !defined(PETSC_USE_COMPLEX)
1488 /*
1489   input:
1490    F:        numeric factor
1491   output:
1492    nneg:     total number of negative pivots
1493    nzero:    total number of zero pivots
1494    npos:     (global dimension of F) - nneg - nzero
1495 */
1496 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1497 {
1498   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1499   PetscMPIInt    size;
1500 
1501   PetscFunctionBegin;
1502   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size));
1503   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1504   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13));
1505 
1506   if (nneg) *nneg = mumps->id.INFOG(12);
1507   if (nzero || npos) {
1508     PetscCheck(mumps->id.ICNTL(24) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1509     if (nzero) *nzero = mumps->id.INFOG(28);
1510     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1511   }
1512   PetscFunctionReturn(0);
1513 }
1514 #endif
1515 
1516 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1517 {
1518   PetscInt       i,nreqs;
1519   PetscMUMPSInt  *irn,*jcn;
1520   PetscMPIInt    count;
1521   PetscInt64     totnnz,remain;
1522   const PetscInt osize=mumps->omp_comm_size;
1523   PetscScalar    *val;
1524 
1525   PetscFunctionBegin;
1526   if (osize > 1) {
1527     if (reuse == MAT_INITIAL_MATRIX) {
1528       /* master first gathers counts of nonzeros to receive */
1529       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize,&mumps->recvcount));
1530       PetscCallMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm));
1531 
1532       /* Then each computes number of send/recvs */
1533       if (mumps->is_omp_master) {
1534         /* Start from 1 since self communication is not done in MPI */
1535         nreqs = 0;
1536         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1537       } else {
1538         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1539       }
1540       PetscCall(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */
1541 
1542       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1543          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1544          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1545          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1546        */
1547       nreqs = 0; /* counter for actual send/recvs */
1548       if (mumps->is_omp_master) {
1549         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1550         PetscCall(PetscMalloc2(totnnz,&irn,totnnz,&jcn));
1551         PetscCall(PetscMalloc1(totnnz,&val));
1552 
1553         /* Self communication */
1554         PetscCall(PetscArraycpy(irn,mumps->irn,mumps->nnz));
1555         PetscCall(PetscArraycpy(jcn,mumps->jcn,mumps->nnz));
1556         PetscCall(PetscArraycpy(val,mumps->val,mumps->nnz));
1557 
1558         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1559         PetscCall(PetscFree2(mumps->irn,mumps->jcn));
1560         PetscCall(PetscFree(mumps->val_alloc));
1561         mumps->nnz = totnnz;
1562         mumps->irn = irn;
1563         mumps->jcn = jcn;
1564         mumps->val = mumps->val_alloc = val;
1565 
1566         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1567         jcn += mumps->recvcount[0];
1568         val += mumps->recvcount[0];
1569 
1570         /* Remote communication */
1571         for (i=1; i<osize; i++) {
1572           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1573           remain = mumps->recvcount[i] - count;
1574           while (count>0) {
1575             PetscCallMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1576             PetscCallMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1577             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1578             irn    += count;
1579             jcn    += count;
1580             val    += count;
1581             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1582             remain -= count;
1583           }
1584         }
1585       } else {
1586         irn    = mumps->irn;
1587         jcn    = mumps->jcn;
1588         val    = mumps->val;
1589         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1590         remain = mumps->nnz - count;
1591         while (count>0) {
1592           PetscCallMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1593           PetscCallMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1594           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1595           irn    += count;
1596           jcn    += count;
1597           val    += count;
1598           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1599           remain -= count;
1600         }
1601       }
1602     } else {
1603       nreqs = 0;
1604       if (mumps->is_omp_master) {
1605         val = mumps->val + mumps->recvcount[0];
1606         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1607           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1608           remain = mumps->recvcount[i] - count;
1609           while (count>0) {
1610             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1611             val    += count;
1612             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1613             remain -= count;
1614           }
1615         }
1616       } else {
1617         val    = mumps->val;
1618         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1619         remain = mumps->nnz - count;
1620         while (count>0) {
1621           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1622           val    += count;
1623           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1624           remain -= count;
1625         }
1626       }
1627     }
1628     PetscCallMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE));
1629     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1635 {
1636   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1637   PetscBool      isMPIAIJ;
1638 
1639   PetscFunctionBegin;
1640   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1641     if (mumps->id.INFOG(1) == -6) {
1642       PetscCall(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1643     }
1644     PetscCall(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1645     PetscFunctionReturn(0);
1646   }
1647 
1648   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1649   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps));
1650 
1651   /* numerical factorization phase */
1652   /*-------------------------------*/
1653   mumps->id.job = JOB_FACTNUMERIC;
1654   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1655     if (!mumps->myid) {
1656       mumps->id.a = (MumpsScalar*)mumps->val;
1657     }
1658   } else {
1659     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1660   }
1661   PetscMUMPS_c(mumps);
1662   if (mumps->id.INFOG(1) < 0) {
1663     if (A->erroriffailure) {
1664       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
1665     } else {
1666       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1667         PetscCall(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1668         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1669       } else if (mumps->id.INFOG(1) == -13) {
1670         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1671         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1672       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1673         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1674         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1675       } else {
1676         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1677         F->factorerrortype = MAT_FACTOR_OTHER;
1678       }
1679     }
1680   }
1681   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16));
1682 
1683   F->assembled    = PETSC_TRUE;
1684 
1685   if (F->schur) { /* reset Schur status to unfactored */
1686 #if defined(PETSC_HAVE_CUDA)
1687     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1688 #endif
1689     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1690       mumps->id.ICNTL(19) = 2;
1691       PetscCall(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur));
1692     }
1693     PetscCall(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED));
1694   }
1695 
1696   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1697   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1698 
1699   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1700   if (mumps->petsc_size > 1) {
1701     PetscInt    lsol_loc;
1702     PetscScalar *sol_loc;
1703 
1704     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ));
1705 
1706     /* distributed solution; Create x_seq=sol_loc for repeated use */
1707     if (mumps->x_seq) {
1708       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1709       PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
1710       PetscCall(VecDestroy(&mumps->x_seq));
1711     }
1712     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1713     PetscCall(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc));
1714     mumps->id.lsol_loc = lsol_loc;
1715     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1716     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq));
1717   }
1718   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 /* Sets MUMPS options from the options database */
1723 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1724 {
1725   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1726   PetscMUMPSInt  icntl=0;
1727   PetscInt       info[80],i,ninfo=80,rbs,cbs;
1728   PetscBool      flg=PETSC_FALSE;
1729 
1730   PetscFunctionBegin;
1731   PetscOptionsBegin(PetscObjectComm((PetscObject)F),((PetscObject)F)->prefix,"MUMPS Options","Mat");
1732   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg));
1733   if (flg) mumps->id.ICNTL(1) = icntl;
1734   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg));
1735   if (flg) mumps->id.ICNTL(2) = icntl;
1736   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg));
1737   if (flg) mumps->id.ICNTL(3) = icntl;
1738 
1739   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg));
1740   if (flg) mumps->id.ICNTL(4) = icntl;
1741   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1742 
1743   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg));
1744   if (flg) mumps->id.ICNTL(6) = icntl;
1745 
1746   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg));
1747   if (flg) {
1748     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1749     mumps->id.ICNTL(7) = icntl;
1750   }
1751 
1752   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL));
1753   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1754   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL));
1755   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL));
1756   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL));
1757   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL));
1758   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL));
1759   PetscCall(MatGetBlockSizes(A,&rbs,&cbs));
1760   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1761   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15","ICNTL(15): compression of the input matrix resulting from a block format","None",mumps->id.ICNTL(15),&mumps->id.ICNTL(15),&flg));
1762   if (flg) {
1763     PetscCheck(mumps->id.ICNTL(15) <= 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Positive -mat_mumps_icntl_15 not handled");
1764     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
1765   }
1766   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL));
1767   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1768     PetscCall(MatDestroy(&F->schur));
1769     PetscCall(MatMumpsResetSchur_Private(mumps));
1770   }
1771 
1772   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1773      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1774      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1775      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1776      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1777      In short, we could not use distributed RHS with MPICH until v4.0b1.
1778    */
1779 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1780   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1781 #else
1782   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1783 #endif
1784   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg));
1785   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20);
1786 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1787   PetscCheck(!flg || mumps->ICNTL20 != 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1788 #endif
1789   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1790 
1791   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL));
1792   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL));
1793   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL));
1794   if (mumps->id.ICNTL(24)) {
1795     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1796   }
1797 
1798   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL));
1799   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL));
1800   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL));
1801   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL));
1802   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL));
1803   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1804   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL));
1805   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1806   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL));
1807   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL));
1808   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL));
1809   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL));
1810 
1811   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL));
1812   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL));
1813   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL));
1814   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL));
1815   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL));
1816   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL));
1817 
1818   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1819 
1820   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL));
1821   if (ninfo) {
1822     PetscCheck(ninfo <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo);
1823     PetscCall(PetscMalloc1(ninfo,&mumps->info));
1824     mumps->ninfo = ninfo;
1825     for (i=0; i<ninfo; i++) {
1826       PetscCheck(info[i] >= 0 && info[i] <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo);
1827       else  mumps->info[i] = info[i];
1828     }
1829   }
1830 
1831   PetscOptionsEnd();
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 PetscErrorCode MatSetFromOptions_MUMPS_OpenMP(Mat F,Mat A)
1836 {
1837   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1838   PetscInt       nthreads=0;
1839 
1840   PetscFunctionBegin;
1841   mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1842   PetscCallMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size));
1843   PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */
1844 
1845   PetscCall(PetscOptionsHasName(NULL,((PetscObject)F)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support));
1846   if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1847   PetscCall(PetscOptionsGetInt(NULL,((PetscObject)F)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL));
1848   if (mumps->use_petsc_omp_support) {
1849 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1850     PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl));
1851     PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master));
1852 #else
1853     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:"");
1854 #endif
1855   } else {
1856     mumps->omp_comm      = PETSC_COMM_SELF;
1857     mumps->mumps_comm    = mumps->petsc_comm;
1858     mumps->is_omp_master = PETSC_TRUE;
1859   }
1860   PetscCallMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size));
1861   mumps->reqs = NULL;
1862   mumps->tag  = 0;
1863 
1864   /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1865   if (mumps->mumps_comm != MPI_COMM_NULL) {
1866     PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm));
1867   }
1868 
1869   mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1870   mumps->id.job = JOB_INIT;
1871   mumps->id.par = 1;  /* host participates factorizaton and solve */
1872   mumps->id.sym = mumps->sym;
1873 
1874   PetscMUMPS_c(mumps);
1875   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
1876 
1877   /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1878      For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1879    */
1880   PetscCallMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm));
1881   PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm));
1882 
1883   mumps->scat_rhs = NULL;
1884   mumps->scat_sol = NULL;
1885 
1886   /* set PETSc-MUMPS default options - override MUMPS default */
1887   mumps->id.ICNTL(3) = 0;
1888   mumps->id.ICNTL(4) = 0;
1889   if (mumps->petsc_size == 1) {
1890     mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1891     mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1892   } else {
1893     mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1894     mumps->id.ICNTL(21) = 1;   /* distributed solution */
1895   }
1896 
1897   /* schur */
1898   mumps->id.size_schur    = 0;
1899   mumps->id.listvar_schur = NULL;
1900   mumps->id.schur         = NULL;
1901   mumps->sizeredrhs       = 0;
1902   mumps->schur_sol        = NULL;
1903   mumps->schur_sizesol    = 0;
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1908 {
1909   PetscFunctionBegin;
1910   if (mumps->id.INFOG(1) < 0) {
1911     if (A->erroriffailure) {
1912       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1));
1913     } else {
1914       if (mumps->id.INFOG(1) == -6) {
1915         PetscCall(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1916         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1917       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1918         PetscCall(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1919         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1920       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1921         PetscCall(PetscInfo(F,"Empty matrix\n"));
1922       } else {
1923         PetscCall(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1924         F->factorerrortype = MAT_FACTOR_OTHER;
1925       }
1926     }
1927   }
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1932 {
1933   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1934   Vec            b;
1935   const PetscInt M = A->rmap->N;
1936 
1937   PetscFunctionBegin;
1938   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1939     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1940     PetscFunctionReturn(0);
1941   }
1942 
1943   /* Set MUMPS options from the options database */
1944   PetscCall(MatSetFromOptions_MUMPS(F,A));
1945 
1946   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1947   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
1948 
1949   /* analysis phase */
1950   /*----------------*/
1951   mumps->id.job = JOB_FACTSYMBOLIC;
1952   mumps->id.n   = M;
1953   switch (mumps->id.ICNTL(18)) {
1954   case 0:  /* centralized assembled matrix input */
1955     if (!mumps->myid) {
1956       mumps->id.nnz = mumps->nnz;
1957       mumps->id.irn = mumps->irn;
1958       mumps->id.jcn = mumps->jcn;
1959       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1960       if (r) {
1961         mumps->id.ICNTL(7) = 1;
1962         if (!mumps->myid) {
1963           const PetscInt *idx;
1964           PetscInt       i;
1965 
1966           PetscCall(PetscMalloc1(M,&mumps->id.perm_in));
1967           PetscCall(ISGetIndices(r,&idx));
1968           for (i=0; i<M; i++) PetscCall(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
1969           PetscCall(ISRestoreIndices(r,&idx));
1970         }
1971       }
1972     }
1973     break;
1974   case 3:  /* distributed assembled matrix input (size>1) */
1975     mumps->id.nnz_loc = mumps->nnz;
1976     mumps->id.irn_loc = mumps->irn;
1977     mumps->id.jcn_loc = mumps->jcn;
1978     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1979     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1980       PetscCall(MatCreateVecs(A,NULL,&b));
1981       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
1982       PetscCall(VecDestroy(&b));
1983     }
1984     break;
1985   }
1986   PetscMUMPS_c(mumps);
1987   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
1988 
1989   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
1990   F->ops->solve           = MatSolve_MUMPS;
1991   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
1992   F->ops->matsolve        = MatMatSolve_MUMPS;
1993   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
1994 
1995   mumps->matstruc = SAME_NONZERO_PATTERN;
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 /* Note the Petsc r and c permutations are ignored */
2000 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2001 {
2002   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2003   Vec            b;
2004   const PetscInt M = A->rmap->N;
2005 
2006   PetscFunctionBegin;
2007   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2008     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2009     PetscFunctionReturn(0);
2010   }
2011 
2012   /* Set MUMPS options from the options database */
2013   PetscCall(MatSetFromOptions_MUMPS(F,A));
2014 
2015   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2016   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2017 
2018   /* analysis phase */
2019   /*----------------*/
2020   mumps->id.job = JOB_FACTSYMBOLIC;
2021   mumps->id.n   = M;
2022   switch (mumps->id.ICNTL(18)) {
2023   case 0:  /* centralized assembled matrix input */
2024     if (!mumps->myid) {
2025       mumps->id.nnz = mumps->nnz;
2026       mumps->id.irn = mumps->irn;
2027       mumps->id.jcn = mumps->jcn;
2028       if (mumps->id.ICNTL(6)>1) {
2029         mumps->id.a = (MumpsScalar*)mumps->val;
2030       }
2031     }
2032     break;
2033   case 3:  /* distributed assembled matrix input (size>1) */
2034     mumps->id.nnz_loc = mumps->nnz;
2035     mumps->id.irn_loc = mumps->irn;
2036     mumps->id.jcn_loc = mumps->jcn;
2037     if (mumps->id.ICNTL(6)>1) {
2038       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2039     }
2040     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2041       PetscCall(MatCreateVecs(A,NULL,&b));
2042       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2043       PetscCall(VecDestroy(&b));
2044     }
2045     break;
2046   }
2047   PetscMUMPS_c(mumps);
2048   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2049 
2050   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2051   F->ops->solve           = MatSolve_MUMPS;
2052   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2053 
2054   mumps->matstruc = SAME_NONZERO_PATTERN;
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 /* Note the Petsc r permutation and factor info are ignored */
2059 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2060 {
2061   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2062   Vec            b;
2063   const PetscInt M = A->rmap->N;
2064 
2065   PetscFunctionBegin;
2066   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2067     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2068     PetscFunctionReturn(0);
2069   }
2070 
2071   /* Set MUMPS options from the options database */
2072   PetscCall(MatSetFromOptions_MUMPS(F,A));
2073 
2074   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2075   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2076 
2077   /* analysis phase */
2078   /*----------------*/
2079   mumps->id.job = JOB_FACTSYMBOLIC;
2080   mumps->id.n   = M;
2081   switch (mumps->id.ICNTL(18)) {
2082   case 0:  /* centralized assembled matrix input */
2083     if (!mumps->myid) {
2084       mumps->id.nnz = mumps->nnz;
2085       mumps->id.irn = mumps->irn;
2086       mumps->id.jcn = mumps->jcn;
2087       if (mumps->id.ICNTL(6)>1) {
2088         mumps->id.a = (MumpsScalar*)mumps->val;
2089       }
2090     }
2091     break;
2092   case 3:  /* distributed assembled matrix input (size>1) */
2093     mumps->id.nnz_loc = mumps->nnz;
2094     mumps->id.irn_loc = mumps->irn;
2095     mumps->id.jcn_loc = mumps->jcn;
2096     if (mumps->id.ICNTL(6)>1) {
2097       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2098     }
2099     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2100       PetscCall(MatCreateVecs(A,NULL,&b));
2101       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2102       PetscCall(VecDestroy(&b));
2103     }
2104     break;
2105   }
2106   PetscMUMPS_c(mumps);
2107   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2108 
2109   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2110   F->ops->solve                 = MatSolve_MUMPS;
2111   F->ops->solvetranspose        = MatSolve_MUMPS;
2112   F->ops->matsolve              = MatMatSolve_MUMPS;
2113   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2114 #if defined(PETSC_USE_COMPLEX)
2115   F->ops->getinertia = NULL;
2116 #else
2117   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2118 #endif
2119 
2120   mumps->matstruc = SAME_NONZERO_PATTERN;
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2125 {
2126   PetscBool         iascii;
2127   PetscViewerFormat format;
2128   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2129 
2130   PetscFunctionBegin;
2131   /* check if matrix is mumps type */
2132   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2133 
2134   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
2135   if (iascii) {
2136     PetscCall(PetscViewerGetFormat(viewer,&format));
2137     if (format == PETSC_VIEWER_ASCII_INFO) {
2138       PetscCall(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n"));
2139       PetscCall(PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d\n",mumps->id.sym));
2140       PetscCall(PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d\n",mumps->id.par));
2141       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d\n",mumps->id.ICNTL(1)));
2142       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2)));
2143       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d\n",mumps->id.ICNTL(3)));
2144       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d\n",mumps->id.ICNTL(4)));
2145       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d\n",mumps->id.ICNTL(5)));
2146       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d\n",mumps->id.ICNTL(6)));
2147       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7)));
2148       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d\n",mumps->id.ICNTL(8)));
2149       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d\n",mumps->id.ICNTL(10)));
2150       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d\n",mumps->id.ICNTL(11)));
2151       if (mumps->id.ICNTL(11)>0) {
2152         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4)));
2153         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5)));
2154         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6)));
2155         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8)));
2156         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9)));
2157         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11)));
2158       }
2159       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d\n",mumps->id.ICNTL(12)));
2160       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d\n",mumps->id.ICNTL(13)));
2161       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14)));
2162       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(15) (compression of the input matrix):            %d\n",mumps->id.ICNTL(15)));
2163       /* ICNTL(15-17) not used */
2164       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d\n",mumps->id.ICNTL(18)));
2165       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d\n",mumps->id.ICNTL(19)));
2166       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d\n",mumps->id.ICNTL(20)));
2167       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d\n",mumps->id.ICNTL(21)));
2168       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d\n",mumps->id.ICNTL(22)));
2169       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23)));
2170 
2171       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d\n",mumps->id.ICNTL(24)));
2172       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d\n",mumps->id.ICNTL(25)));
2173       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d\n",mumps->id.ICNTL(26)));
2174       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d\n",mumps->id.ICNTL(27)));
2175       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d\n",mumps->id.ICNTL(28)));
2176       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d\n",mumps->id.ICNTL(29)));
2177 
2178       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n",mumps->id.ICNTL(30)));
2179       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d\n",mumps->id.ICNTL(31)));
2180       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d\n",mumps->id.ICNTL(33)));
2181       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d\n",mumps->id.ICNTL(35)));
2182       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d\n",mumps->id.ICNTL(36)));
2183       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d\n",mumps->id.ICNTL(38)));
2184 
2185       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1)));
2186       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2)));
2187       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3)));
2188       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4)));
2189       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5)));
2190       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7)));
2191 
2192       /* information local to each processor */
2193       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n"));
2194       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2195       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1)));
2196       PetscCall(PetscViewerFlush(viewer));
2197       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n"));
2198       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2)));
2199       PetscCall(PetscViewerFlush(viewer));
2200       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n"));
2201       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3)));
2202       PetscCall(PetscViewerFlush(viewer));
2203 
2204       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n"));
2205       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d\n",mumps->myid,mumps->id.INFO(15)));
2206       PetscCall(PetscViewerFlush(viewer));
2207 
2208       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n"));
2209       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(16)));
2210       PetscCall(PetscViewerFlush(viewer));
2211 
2212       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n"));
2213       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(23)));
2214       PetscCall(PetscViewerFlush(viewer));
2215 
2216       if (mumps->ninfo && mumps->ninfo <= 80) {
2217         PetscInt i;
2218         for (i=0; i<mumps->ninfo; i++) {
2219           PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "): \n",mumps->info[i]));
2220           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i])));
2221           PetscCall(PetscViewerFlush(viewer));
2222         }
2223       }
2224       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2225 
2226       if (!mumps->myid) { /* information from the host */
2227         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1)));
2228         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2)));
2229         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3)));
2230         PetscCall(PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34)));
2231 
2232         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3)));
2233         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4)));
2234         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5)));
2235         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6)));
2236         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7)));
2237         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8)));
2238         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9)));
2239         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10)));
2240         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11)));
2241         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12)));
2242         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13)));
2243         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14)));
2244         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15)));
2245         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16)));
2246         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17)));
2247         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18)));
2248         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19)));
2249         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20)));
2250         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21)));
2251         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22)));
2252         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23)));
2253         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24)));
2254         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25)));
2255         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28)));
2256         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29)));
2257         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31)));
2258         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32)));
2259         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33)));
2260         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34)));
2261         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35)));
2262         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36)));
2263         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37)));
2264         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38)));
2265         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39)));
2266       }
2267     }
2268   }
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2273 {
2274   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2275 
2276   PetscFunctionBegin;
2277   info->block_size        = 1.0;
2278   info->nz_allocated      = mumps->id.INFOG(20);
2279   info->nz_used           = mumps->id.INFOG(20);
2280   info->nz_unneeded       = 0.0;
2281   info->assemblies        = 0.0;
2282   info->mallocs           = 0.0;
2283   info->memory            = 0.0;
2284   info->fill_ratio_given  = 0;
2285   info->fill_ratio_needed = 0;
2286   info->factor_mallocs    = 0;
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 /* -------------------------------------------------------------------------------------------*/
2291 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2292 {
2293   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2294   const PetscScalar *arr;
2295   const PetscInt    *idxs;
2296   PetscInt          size,i;
2297 
2298   PetscFunctionBegin;
2299   PetscCall(ISGetLocalSize(is,&size));
2300   if (mumps->petsc_size > 1) {
2301     PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2302 
2303     ls   = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2304     PetscCallMPI(MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm));
2305     PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc");
2306   }
2307 
2308   /* Schur complement matrix */
2309   PetscCall(MatDestroy(&F->schur));
2310   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur));
2311   PetscCall(MatDenseGetArrayRead(F->schur,&arr));
2312   mumps->id.schur      = (MumpsScalar*)arr;
2313   mumps->id.size_schur = size;
2314   mumps->id.schur_lld  = size;
2315   PetscCall(MatDenseRestoreArrayRead(F->schur,&arr));
2316   if (mumps->sym == 1) {
2317     PetscCall(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE));
2318   }
2319 
2320   /* MUMPS expects Fortran style indices */
2321   PetscCall(PetscFree(mumps->id.listvar_schur));
2322   PetscCall(PetscMalloc1(size,&mumps->id.listvar_schur));
2323   PetscCall(ISGetIndices(is,&idxs));
2324   for (i=0; i<size; i++) PetscCall(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i])));
2325   PetscCall(ISRestoreIndices(is,&idxs));
2326   if (mumps->petsc_size > 1) {
2327     mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2328   } else {
2329     if (F->factortype == MAT_FACTOR_LU) {
2330       mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2331     } else {
2332       mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2333     }
2334   }
2335   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2336   mumps->id.ICNTL(26) = -1;
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 /* -------------------------------------------------------------------------------------------*/
2341 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2342 {
2343   Mat            St;
2344   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2345   PetscScalar    *array;
2346 #if defined(PETSC_USE_COMPLEX)
2347   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2348 #endif
2349 
2350   PetscFunctionBegin;
2351   PetscCheck(mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2352   PetscCall(MatCreate(PETSC_COMM_SELF,&St));
2353   PetscCall(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur));
2354   PetscCall(MatSetType(St,MATDENSE));
2355   PetscCall(MatSetUp(St));
2356   PetscCall(MatDenseGetArray(St,&array));
2357   if (!mumps->sym) { /* MUMPS always return a full matrix */
2358     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2359       PetscInt i,j,N=mumps->id.size_schur;
2360       for (i=0;i<N;i++) {
2361         for (j=0;j<N;j++) {
2362 #if !defined(PETSC_USE_COMPLEX)
2363           PetscScalar val = mumps->id.schur[i*N+j];
2364 #else
2365           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2366 #endif
2367           array[j*N+i] = val;
2368         }
2369       }
2370     } else { /* stored by columns */
2371       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2372     }
2373   } else { /* either full or lower-triangular (not packed) */
2374     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2375       PetscInt i,j,N=mumps->id.size_schur;
2376       for (i=0;i<N;i++) {
2377         for (j=i;j<N;j++) {
2378 #if !defined(PETSC_USE_COMPLEX)
2379           PetscScalar val = mumps->id.schur[i*N+j];
2380 #else
2381           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2382 #endif
2383           array[i*N+j] = val;
2384           array[j*N+i] = val;
2385         }
2386       }
2387     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2388       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2389     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2390       PetscInt i,j,N=mumps->id.size_schur;
2391       for (i=0;i<N;i++) {
2392         for (j=0;j<i+1;j++) {
2393 #if !defined(PETSC_USE_COMPLEX)
2394           PetscScalar val = mumps->id.schur[i*N+j];
2395 #else
2396           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2397 #endif
2398           array[i*N+j] = val;
2399           array[j*N+i] = val;
2400         }
2401       }
2402     }
2403   }
2404   PetscCall(MatDenseRestoreArray(St,&array));
2405   *S   = St;
2406   PetscFunctionReturn(0);
2407 }
2408 
2409 /* -------------------------------------------------------------------------------------------*/
2410 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2411 {
2412   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2413 
2414   PetscFunctionBegin;
2415   PetscCall(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl)));
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2420 {
2421   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2422 
2423   PetscFunctionBegin;
2424   *ival = mumps->id.ICNTL(icntl);
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 /*@
2429   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2430 
2431    Logically Collective on Mat
2432 
2433    Input Parameters:
2434 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2435 .  icntl - index of MUMPS parameter array ICNTL()
2436 -  ival - value of MUMPS ICNTL(icntl)
2437 
2438   Options Database:
2439 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2440 
2441    Level: beginner
2442 
2443    References:
2444 .  * - MUMPS Users' Guide
2445 
2446 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2447 @*/
2448 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2449 {
2450   PetscFunctionBegin;
2451   PetscValidType(F,1);
2452   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2453   PetscValidLogicalCollectiveInt(F,icntl,2);
2454   PetscValidLogicalCollectiveInt(F,ival,3);
2455   PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 /*@
2460   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2461 
2462    Logically Collective on Mat
2463 
2464    Input Parameters:
2465 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2466 -  icntl - index of MUMPS parameter array ICNTL()
2467 
2468   Output Parameter:
2469 .  ival - value of MUMPS ICNTL(icntl)
2470 
2471    Level: beginner
2472 
2473    References:
2474 .  * - MUMPS Users' Guide
2475 
2476 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2477 @*/
2478 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2479 {
2480   PetscFunctionBegin;
2481   PetscValidType(F,1);
2482   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2483   PetscValidLogicalCollectiveInt(F,icntl,2);
2484   PetscValidIntPointer(ival,3);
2485   PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2486   PetscFunctionReturn(0);
2487 }
2488 
2489 /* -------------------------------------------------------------------------------------------*/
2490 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2491 {
2492   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2493 
2494   PetscFunctionBegin;
2495   mumps->id.CNTL(icntl) = val;
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2500 {
2501   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2502 
2503   PetscFunctionBegin;
2504   *val = mumps->id.CNTL(icntl);
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 /*@
2509   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2510 
2511    Logically Collective on Mat
2512 
2513    Input Parameters:
2514 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2515 .  icntl - index of MUMPS parameter array CNTL()
2516 -  val - value of MUMPS CNTL(icntl)
2517 
2518   Options Database:
2519 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2520 
2521    Level: beginner
2522 
2523    References:
2524 .  * - MUMPS Users' Guide
2525 
2526 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2527 @*/
2528 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2529 {
2530   PetscFunctionBegin;
2531   PetscValidType(F,1);
2532   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2533   PetscValidLogicalCollectiveInt(F,icntl,2);
2534   PetscValidLogicalCollectiveReal(F,val,3);
2535   PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 /*@
2540   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2541 
2542    Logically Collective on Mat
2543 
2544    Input Parameters:
2545 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2546 -  icntl - index of MUMPS parameter array CNTL()
2547 
2548   Output Parameter:
2549 .  val - value of MUMPS CNTL(icntl)
2550 
2551    Level: beginner
2552 
2553    References:
2554 .  * - MUMPS Users' Guide
2555 
2556 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2557 @*/
2558 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2559 {
2560   PetscFunctionBegin;
2561   PetscValidType(F,1);
2562   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2563   PetscValidLogicalCollectiveInt(F,icntl,2);
2564   PetscValidRealPointer(val,3);
2565   PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2570 {
2571   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2572 
2573   PetscFunctionBegin;
2574   *info = mumps->id.INFO(icntl);
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2579 {
2580   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2581 
2582   PetscFunctionBegin;
2583   *infog = mumps->id.INFOG(icntl);
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2588 {
2589   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2590 
2591   PetscFunctionBegin;
2592   *rinfo = mumps->id.RINFO(icntl);
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2597 {
2598   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2599 
2600   PetscFunctionBegin;
2601   *rinfog = mumps->id.RINFOG(icntl);
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2606 {
2607   Mat            Bt = NULL,Btseq = NULL;
2608   PetscBool      flg;
2609   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2610   PetscScalar    *aa;
2611   PetscInt       spnr,*ia,*ja,M,nrhs;
2612 
2613   PetscFunctionBegin;
2614   PetscValidPointer(spRHS,2);
2615   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg));
2616   if (flg) {
2617     PetscCall(MatTransposeGetMat(spRHS,&Bt));
2618   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2619 
2620   PetscCall(MatMumpsSetIcntl(F,30,1));
2621 
2622   if (mumps->petsc_size > 1) {
2623     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2624     Btseq = b->A;
2625   } else {
2626     Btseq = Bt;
2627   }
2628 
2629   PetscCall(MatGetSize(spRHS,&M,&nrhs));
2630   mumps->id.nrhs = nrhs;
2631   mumps->id.lrhs = M;
2632   mumps->id.rhs  = NULL;
2633 
2634   if (!mumps->myid) {
2635     PetscCall(MatSeqAIJGetArray(Btseq,&aa));
2636     PetscCall(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2637     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2638     PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
2639     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2640   } else {
2641     mumps->id.irhs_ptr    = NULL;
2642     mumps->id.irhs_sparse = NULL;
2643     mumps->id.nz_rhs      = 0;
2644     mumps->id.rhs_sparse  = NULL;
2645   }
2646   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2647   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2648 
2649   /* solve phase */
2650   /*-------------*/
2651   mumps->id.job = JOB_SOLVE;
2652   PetscMUMPS_c(mumps);
2653   if (mumps->id.INFOG(1) < 0)
2654     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
2655 
2656   if (!mumps->myid) {
2657     PetscCall(MatSeqAIJRestoreArray(Btseq,&aa));
2658     PetscCall(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2659     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2660   }
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 /*@
2665   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2666 
2667    Logically Collective on Mat
2668 
2669    Input Parameters:
2670 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2671 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2672 
2673   Output Parameter:
2674 . spRHS - requested entries of inverse of A
2675 
2676    Level: beginner
2677 
2678    References:
2679 .  * - MUMPS Users' Guide
2680 
2681 .seealso: `MatGetFactor()`, `MatCreateTranspose()`
2682 @*/
2683 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2684 {
2685   PetscFunctionBegin;
2686   PetscValidType(F,1);
2687   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2688   PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));
2689   PetscFunctionReturn(0);
2690 }
2691 
2692 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2693 {
2694   Mat            spRHS;
2695 
2696   PetscFunctionBegin;
2697   PetscCall(MatCreateTranspose(spRHST,&spRHS));
2698   PetscCall(MatMumpsGetInverse_MUMPS(F,spRHS));
2699   PetscCall(MatDestroy(&spRHS));
2700   PetscFunctionReturn(0);
2701 }
2702 
2703 /*@
2704   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2705 
2706    Logically Collective on Mat
2707 
2708    Input Parameters:
2709 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2710 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2711 
2712   Output Parameter:
2713 . spRHST - requested entries of inverse of A^T
2714 
2715    Level: beginner
2716 
2717    References:
2718 .  * - MUMPS Users' Guide
2719 
2720 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2721 @*/
2722 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2723 {
2724   PetscBool      flg;
2725 
2726   PetscFunctionBegin;
2727   PetscValidType(F,1);
2728   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2729   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
2730   PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2731 
2732   PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 /*@
2737   MatMumpsGetInfo - Get MUMPS parameter INFO()
2738 
2739    Logically Collective on Mat
2740 
2741    Input Parameters:
2742 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2743 -  icntl - index of MUMPS parameter array INFO()
2744 
2745   Output Parameter:
2746 .  ival - value of MUMPS INFO(icntl)
2747 
2748    Level: beginner
2749 
2750    References:
2751 .  * - MUMPS Users' Guide
2752 
2753 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2754 @*/
2755 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2756 {
2757   PetscFunctionBegin;
2758   PetscValidType(F,1);
2759   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2760   PetscValidIntPointer(ival,3);
2761   PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 /*@
2766   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2767 
2768    Logically Collective on Mat
2769 
2770    Input Parameters:
2771 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2772 -  icntl - index of MUMPS parameter array INFOG()
2773 
2774   Output Parameter:
2775 .  ival - value of MUMPS INFOG(icntl)
2776 
2777    Level: beginner
2778 
2779    References:
2780 .  * - MUMPS Users' Guide
2781 
2782 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2783 @*/
2784 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2785 {
2786   PetscFunctionBegin;
2787   PetscValidType(F,1);
2788   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2789   PetscValidIntPointer(ival,3);
2790   PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 /*@
2795   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2796 
2797    Logically Collective on Mat
2798 
2799    Input Parameters:
2800 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2801 -  icntl - index of MUMPS parameter array RINFO()
2802 
2803   Output Parameter:
2804 .  val - value of MUMPS RINFO(icntl)
2805 
2806    Level: beginner
2807 
2808    References:
2809 .  * - MUMPS Users' Guide
2810 
2811 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2812 @*/
2813 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2814 {
2815   PetscFunctionBegin;
2816   PetscValidType(F,1);
2817   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2818   PetscValidRealPointer(val,3);
2819   PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2820   PetscFunctionReturn(0);
2821 }
2822 
2823 /*@
2824   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2825 
2826    Logically Collective on Mat
2827 
2828    Input Parameters:
2829 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2830 -  icntl - index of MUMPS parameter array RINFOG()
2831 
2832   Output Parameter:
2833 .  val - value of MUMPS RINFOG(icntl)
2834 
2835    Level: beginner
2836 
2837    References:
2838 .  * - MUMPS Users' Guide
2839 
2840 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2841 @*/
2842 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2843 {
2844   PetscFunctionBegin;
2845   PetscValidType(F,1);
2846   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2847   PetscValidRealPointer(val,3);
2848   PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2849   PetscFunctionReturn(0);
2850 }
2851 
2852 /*MC
2853   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2854   distributed and sequential matrices via the external package MUMPS.
2855 
2856   Works with MATAIJ and MATSBAIJ matrices
2857 
2858   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2859 
2860   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2861 
2862   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2863 
2864   Options Database Keys:
2865 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2866 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2867 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2868 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2869 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2870 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2871                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2872 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2873 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2874 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2875 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2876 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2877 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2878 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
2879 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2880 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2881 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2882 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2883 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2884 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2885 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2886 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2887 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2888 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2889 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2890 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2891 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2892 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2893 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2894 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2895 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2896 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2897 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2898 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2899 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2900 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2901                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2902 
2903   Level: beginner
2904 
2905     Notes:
2906     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2907 
2908     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
2909     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
2910 
2911     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2912 $          KSPGetPC(ksp,&pc);
2913 $          PCFactorGetMatrix(pc,&mat);
2914 $          MatMumpsGetInfo(mat,....);
2915 $          MatMumpsGetInfog(mat,....); etc.
2916            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2917 
2918   Using MUMPS with 64-bit integers
2919     MUMPS provides 64-bit integer support in two build modes:
2920       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2921       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2922 
2923       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2924       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2925       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2926       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2927 
2928     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2929 
2930   Two modes to run MUMPS/PETSc with OpenMP
2931 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2932 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2933 
2934 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2935 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2936 
2937    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2938    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2939    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2940    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2941    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2942 
2943    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2944    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2945    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2946    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2947    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2948    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2949    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2950    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2951    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2952    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2953    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2954    examine the mapping result.
2955 
2956    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2957    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2958    calls omp_set_num_threads(m) internally before calling MUMPS.
2959 
2960    References:
2961 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2962 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2963 
2964 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
2965 
2966 M*/
2967 
2968 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2969 {
2970   PetscFunctionBegin;
2971   *type = MATSOLVERMUMPS;
2972   PetscFunctionReturn(0);
2973 }
2974 
2975 /* MatGetFactor for Seq and MPI AIJ matrices */
2976 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
2977 {
2978   Mat            B;
2979   Mat_MUMPS      *mumps;
2980   PetscBool      isSeqAIJ;
2981   PetscMPIInt    size;
2982 
2983   PetscFunctionBegin;
2984  #if defined(PETSC_USE_COMPLEX)
2985   PetscCheck(!A->hermitian || A->symmetric || ftype != MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
2986  #endif
2987   /* Create the factorization matrix */
2988   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ));
2989   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
2990   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2991   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
2992   PetscCall(MatSetUp(B));
2993 
2994   PetscCall(PetscNewLog(B,&mumps));
2995 
2996   B->ops->view    = MatView_MUMPS;
2997   B->ops->getinfo = MatGetInfo_MUMPS;
2998 
2999   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3000   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3001   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3002   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3003   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3004   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3005   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3006   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3007   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3008   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3009   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3010   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3011   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3012 
3013   if (ftype == MAT_FACTOR_LU) {
3014     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3015     B->factortype            = MAT_FACTOR_LU;
3016     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3017     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3018     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3019     mumps->sym = 0;
3020   } else {
3021     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3022     B->factortype                  = MAT_FACTOR_CHOLESKY;
3023     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3024     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3025     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3026 #if defined(PETSC_USE_COMPLEX)
3027     mumps->sym = 2;
3028 #else
3029     if (A->spd_set && A->spd) mumps->sym = 1;
3030     else                      mumps->sym = 2;
3031 #endif
3032   }
3033 
3034   /* set solvertype */
3035   PetscCall(PetscFree(B->solvertype));
3036   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3037   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3038   if (size == 1) {
3039     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3040     B->canuseordering = PETSC_TRUE;
3041   }
3042   B->ops->destroy = MatDestroy_MUMPS;
3043   B->data         = (void*)mumps;
3044 
3045   PetscCall(MatSetFromOptions_MUMPS_OpenMP(B,A));
3046 
3047   *F = B;
3048   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3049   PetscFunctionReturn(0);
3050 }
3051 
3052 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3053 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3054 {
3055   Mat            B;
3056   Mat_MUMPS      *mumps;
3057   PetscBool      isSeqSBAIJ;
3058   PetscMPIInt    size;
3059 
3060   PetscFunctionBegin;
3061  #if defined(PETSC_USE_COMPLEX)
3062   PetscCheck(!A->hermitian || A->symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3063  #endif
3064   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3065   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3066   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3067   PetscCall(MatSetUp(B));
3068 
3069   PetscCall(PetscNewLog(B,&mumps));
3070   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ));
3071   if (isSeqSBAIJ) {
3072     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3073   } else {
3074     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3075   }
3076 
3077   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3078   B->ops->view                   = MatView_MUMPS;
3079   B->ops->getinfo                = MatGetInfo_MUMPS;
3080 
3081   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3082   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3083   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3084   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3085   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3086   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3087   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3088   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3089   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3090   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3091   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3092   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3093   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3094 
3095   B->factortype = MAT_FACTOR_CHOLESKY;
3096 #if defined(PETSC_USE_COMPLEX)
3097   mumps->sym = 2;
3098 #else
3099   if (A->spd_set && A->spd) mumps->sym = 1;
3100   else                      mumps->sym = 2;
3101 #endif
3102 
3103   /* set solvertype */
3104   PetscCall(PetscFree(B->solvertype));
3105   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3106   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3107   if (size == 1) {
3108     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3109     B->canuseordering = PETSC_TRUE;
3110   }
3111   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3112   B->ops->destroy = MatDestroy_MUMPS;
3113   B->data         = (void*)mumps;
3114 
3115   PetscCall(MatSetFromOptions_MUMPS_OpenMP(B,A));
3116 
3117   *F = B;
3118   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3119   PetscFunctionReturn(0);
3120 }
3121 
3122 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3123 {
3124   Mat            B;
3125   Mat_MUMPS      *mumps;
3126   PetscBool      isSeqBAIJ;
3127   PetscMPIInt    size;
3128 
3129   PetscFunctionBegin;
3130   /* Create the factorization matrix */
3131   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ));
3132   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3133   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3134   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3135   PetscCall(MatSetUp(B));
3136 
3137   PetscCall(PetscNewLog(B,&mumps));
3138   if (ftype == MAT_FACTOR_LU) {
3139     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3140     B->factortype            = MAT_FACTOR_LU;
3141     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3142     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3143     mumps->sym = 0;
3144     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3145   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3146 
3147   B->ops->view        = MatView_MUMPS;
3148   B->ops->getinfo     = MatGetInfo_MUMPS;
3149 
3150   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3151   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3152   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3153   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3154   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3155   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3156   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3157   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3158   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3159   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3160   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3161   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3162   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3163 
3164   /* set solvertype */
3165   PetscCall(PetscFree(B->solvertype));
3166   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3167   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3168   if (size == 1) {
3169     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3170     B->canuseordering = PETSC_TRUE;
3171   }
3172   B->ops->destroy = MatDestroy_MUMPS;
3173   B->data         = (void*)mumps;
3174 
3175   PetscCall(MatSetFromOptions_MUMPS_OpenMP(B,A));
3176 
3177   *F = B;
3178   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3179   PetscFunctionReturn(0);
3180 }
3181 
3182 /* MatGetFactor for Seq and MPI SELL matrices */
3183 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3184 {
3185   Mat            B;
3186   Mat_MUMPS      *mumps;
3187   PetscBool      isSeqSELL;
3188   PetscMPIInt    size;
3189 
3190   PetscFunctionBegin;
3191   /* Create the factorization matrix */
3192   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL));
3193   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3194   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3195   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3196   PetscCall(MatSetUp(B));
3197 
3198   PetscCall(PetscNewLog(B,&mumps));
3199 
3200   B->ops->view        = MatView_MUMPS;
3201   B->ops->getinfo     = MatGetInfo_MUMPS;
3202 
3203   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3204   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3205   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3206   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3207   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3208   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3209   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3210   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3211   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3212   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3213   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3214 
3215   if (ftype == MAT_FACTOR_LU) {
3216     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3217     B->factortype            = MAT_FACTOR_LU;
3218     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3219     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3220     mumps->sym = 0;
3221     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3222   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3223 
3224   /* set solvertype */
3225   PetscCall(PetscFree(B->solvertype));
3226   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3227   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3228   if (size == 1) {
3229     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3230     B->canuseordering = PETSC_TRUE;
3231   }
3232   B->ops->destroy = MatDestroy_MUMPS;
3233   B->data         = (void*)mumps;
3234 
3235   PetscCall(MatSetFromOptions_MUMPS_OpenMP(B,A));
3236 
3237   *F = B;
3238   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3239   PetscFunctionReturn(0);
3240 }
3241 
3242 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3243 {
3244   PetscFunctionBegin;
3245   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3246   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3247   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3248   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3249   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3250   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3251   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3252   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3253   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3254   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3255   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps));
3256   PetscFunctionReturn(0);
3257 }
3258