xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 4ffacfe27a72f4cdf51b68a3bbb6aed96040fb2f)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30 
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45 
46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48    naming convention in PetscMPIInt, PetscBLASInt etc.
49 */
50 typedef MUMPS_INT PetscMUMPSInt;
51 
52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55   #endif
56 #else
57   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #endif
61 
62 #define MPIU_MUMPSINT             MPI_INT
63 #define PETSC_MUMPS_INT_MAX       2147483647
64 #define PETSC_MUMPS_INT_MIN       -2147483648
65 
66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68 {
69   PetscFunctionBegin;
70 #if PetscDefined(USE_64BIT_INDICES)
71   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
72 #endif
73   *b = (PetscMUMPSInt)(a);
74   PetscFunctionReturn(0);
75 }
76 
77 /* Put these utility routines here since they are only used in this file */
78 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
79 {
80   PetscInt       myval;
81   PetscBool      myset;
82   PetscFunctionBegin;
83   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
84   PetscCall(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub));
85   if (myset) PetscCall(PetscMUMPSIntCast(myval,value));
86   if (set) *set = myset;
87   PetscFunctionReturn(0);
88 }
89 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
90 
91 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
92 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
93 #define PetscMUMPS_c(mumps) \
94   do { \
95     if (mumps->use_petsc_omp_support) { \
96       if (mumps->is_omp_master) { \
97         PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
98         MUMPS_c(&mumps->id); \
99         PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
100       } \
101       PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
102       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
103          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
104          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
105          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
106       */ \
107       PetscCallMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\
108       PetscCallMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm));\
109       PetscCallMPI(MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm));\
110     } else { \
111       MUMPS_c(&mumps->id); \
112     } \
113   } while (0)
114 #else
115 #define PetscMUMPS_c(mumps) \
116   do { MUMPS_c(&mumps->id); } while (0)
117 #endif
118 
119 /* declare MumpsScalar */
120 #if defined(PETSC_USE_COMPLEX)
121 #if defined(PETSC_USE_REAL_SINGLE)
122 #define MumpsScalar mumps_complex
123 #else
124 #define MumpsScalar mumps_double_complex
125 #endif
126 #else
127 #define MumpsScalar PetscScalar
128 #endif
129 
130 /* macros s.t. indices match MUMPS documentation */
131 #define ICNTL(I) icntl[(I)-1]
132 #define CNTL(I) cntl[(I)-1]
133 #define INFOG(I) infog[(I)-1]
134 #define INFO(I) info[(I)-1]
135 #define RINFOG(I) rinfog[(I)-1]
136 #define RINFO(I) rinfo[(I)-1]
137 
138 typedef struct Mat_MUMPS Mat_MUMPS;
139 struct Mat_MUMPS {
140 #if defined(PETSC_USE_COMPLEX)
141 #if defined(PETSC_USE_REAL_SINGLE)
142   CMUMPS_STRUC_C id;
143 #else
144   ZMUMPS_STRUC_C id;
145 #endif
146 #else
147 #if defined(PETSC_USE_REAL_SINGLE)
148   SMUMPS_STRUC_C id;
149 #else
150   DMUMPS_STRUC_C id;
151 #endif
152 #endif
153 
154   MatStructure   matstruc;
155   PetscMPIInt    myid,petsc_size;
156   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
157   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
158   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
159   PetscMUMPSInt  sym;
160   MPI_Comm       mumps_comm;
161   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
162   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
163   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
164   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
165 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
166   PetscInt       *rhs_nrow,max_nrhs;
167   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
168   PetscScalar    *rhs_loc,*rhs_recvbuf;
169 #endif
170   Vec            b_seq,x_seq;
171   PetscInt       ninfo,*info;           /* which INFO to display */
172   PetscInt       sizeredrhs;
173   PetscScalar    *schur_sol;
174   PetscInt       schur_sizesol;
175   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
176   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
177   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
178 
179   /* stuff used by petsc/mumps OpenMP support*/
180   PetscBool      use_petsc_omp_support;
181   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
182   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
183   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
184   PetscMPIInt    tag,omp_comm_size;
185   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
186   MPI_Request    *reqs;
187 };
188 
189 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
190    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
191  */
192 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
193 {
194   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_USE_64BIT_INDICES)
198   {
199     PetscInt i;
200     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201       PetscCall(PetscFree(mumps->ia_alloc));
202       PetscCall(PetscMalloc1(nrow+1,&mumps->ia_alloc));
203       mumps->cur_ilen = nrow+1;
204     }
205     if (nnz > mumps->cur_jlen) {
206       PetscCall(PetscFree(mumps->ja_alloc));
207       PetscCall(PetscMalloc1(nnz,&mumps->ja_alloc));
208       mumps->cur_jlen = nnz;
209     }
210     for (i=0; i<nrow+1; i++) PetscCall(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i])));
211     for (i=0; i<nnz; i++)    PetscCall(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i])));
212     *ia_mumps = mumps->ia_alloc;
213     *ja_mumps = mumps->ja_alloc;
214   }
215 #else
216   *ia_mumps = ia;
217   *ja_mumps = ja;
218 #endif
219   PetscCall(PetscMUMPSIntCast(nnz,nnz_mumps));
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
224 {
225   PetscFunctionBegin;
226   PetscCall(PetscFree(mumps->id.listvar_schur));
227   PetscCall(PetscFree(mumps->id.redrhs));
228   PetscCall(PetscFree(mumps->schur_sol));
229   mumps->id.size_schur = 0;
230   mumps->id.schur_lld  = 0;
231   mumps->id.ICNTL(19)  = 0;
232   PetscFunctionReturn(0);
233 }
234 
235 /* solve with rhs in mumps->id.redrhs and return in the same location */
236 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
237 {
238   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
239   Mat                  S,B,X;
240   MatFactorSchurStatus schurstatus;
241   PetscInt             sizesol;
242 
243   PetscFunctionBegin;
244   PetscCall(MatFactorFactorizeSchurComplement(F));
245   PetscCall(MatFactorGetSchurComplement(F,&S,&schurstatus));
246   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B));
247   PetscCall(MatSetType(B,((PetscObject)S)->type_name));
248 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
249   PetscCall(MatBindToCPU(B,S->boundtocpu));
250 #endif
251   switch (schurstatus) {
252   case MAT_FACTOR_SCHUR_FACTORED:
253     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X));
254     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
255 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
256     PetscCall(MatBindToCPU(X,S->boundtocpu));
257 #endif
258     if (!mumps->id.ICNTL(9)) { /* transpose solve */
259       PetscCall(MatMatSolveTranspose(S,B,X));
260     } else {
261       PetscCall(MatMatSolve(S,B,X));
262     }
263     break;
264   case MAT_FACTOR_SCHUR_INVERTED:
265     sizesol = mumps->id.nrhs*mumps->id.size_schur;
266     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
267       PetscCall(PetscFree(mumps->schur_sol));
268       PetscCall(PetscMalloc1(sizesol,&mumps->schur_sol));
269       mumps->schur_sizesol = sizesol;
270     }
271     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X));
272     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
274     PetscCall(MatBindToCPU(X,S->boundtocpu));
275 #endif
276     PetscCall(MatProductCreateWithMat(S,B,NULL,X));
277     if (!mumps->id.ICNTL(9)) { /* transpose solve */
278       PetscCall(MatProductSetType(X,MATPRODUCT_AtB));
279     } else {
280       PetscCall(MatProductSetType(X,MATPRODUCT_AB));
281     }
282     PetscCall(MatProductSetFromOptions(X));
283     PetscCall(MatProductSymbolic(X));
284     PetscCall(MatProductNumeric(X));
285 
286     PetscCall(MatCopy(X,B,SAME_NONZERO_PATTERN));
287     break;
288   default:
289     SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status);
290   }
291   PetscCall(MatFactorRestoreSchurComplement(F,&S,schurstatus));
292   PetscCall(MatDestroy(&B));
293   PetscCall(MatDestroy(&X));
294   PetscFunctionReturn(0);
295 }
296 
297 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
298 {
299   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
300 
301   PetscFunctionBegin;
302   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
303     PetscFunctionReturn(0);
304   }
305   if (!expansion) { /* prepare for the condensation step */
306     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
307     /* allocate MUMPS internal array to store reduced right-hand sides */
308     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
309       PetscCall(PetscFree(mumps->id.redrhs));
310       mumps->id.lredrhs = mumps->id.size_schur;
311       PetscCall(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs));
312       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
313     }
314     mumps->id.ICNTL(26) = 1; /* condensation phase */
315   } else { /* prepare for the expansion step */
316     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
317     PetscCall(MatMumpsSolveSchur_Private(F));
318     mumps->id.ICNTL(26) = 2; /* expansion phase */
319     PetscMUMPS_c(mumps);
320     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
321     /* restore defaults */
322     mumps->id.ICNTL(26) = -1;
323     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
324     if (mumps->id.nrhs > 1) {
325       PetscCall(PetscFree(mumps->id.redrhs));
326       mumps->id.lredrhs = 0;
327       mumps->sizeredrhs = 0;
328     }
329   }
330   PetscFunctionReturn(0);
331 }
332 
333 /*
334   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
335 
336   input:
337     A       - matrix in aij,baij or sbaij format
338     shift   - 0: C style output triple; 1: Fortran style output triple.
339     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
340               MAT_REUSE_MATRIX:   only the values in v array are updated
341   output:
342     nnz     - dim of r, c, and v (number of local nonzero entries of A)
343     r, c, v - row and col index, matrix values (matrix triples)
344 
345   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
346   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
347   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
348 
349  */
350 
351 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
352 {
353   const PetscScalar *av;
354   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
355   PetscInt64        nz,rnz,i,j,k;
356   PetscMUMPSInt     *row,*col;
357   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
358 
359   PetscFunctionBegin;
360   PetscCall(MatSeqAIJGetArrayRead(A,&av));
361   mumps->val = (PetscScalar*)av;
362   if (reuse == MAT_INITIAL_MATRIX) {
363     nz   = aa->nz;
364     ai   = aa->i;
365     aj   = aa->j;
366     PetscCall(PetscMalloc2(nz,&row,nz,&col));
367     for (i=k=0; i<M; i++) {
368       rnz = ai[i+1] - ai[i];
369       ajj = aj + ai[i];
370       for (j=0; j<rnz; j++) {
371         PetscCall(PetscMUMPSIntCast(i+shift,&row[k]));
372         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[k]));
373         k++;
374       }
375     }
376     mumps->irn = row;
377     mumps->jcn = col;
378     mumps->nnz = nz;
379   }
380   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
381   PetscFunctionReturn(0);
382 }
383 
384 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
385 {
386   PetscInt64     nz,i,j,k,r;
387   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
388   PetscMUMPSInt  *row,*col;
389 
390   PetscFunctionBegin;
391   mumps->val = a->val;
392   if (reuse == MAT_INITIAL_MATRIX) {
393     nz   = a->sliidx[a->totalslices];
394     PetscCall(PetscMalloc2(nz,&row,nz,&col));
395     for (i=k=0; i<a->totalslices; i++) {
396       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
397         PetscCall(PetscMUMPSIntCast(8*i+r+shift,&row[k++]));
398       }
399     }
400     for (i=0;i<nz;i++) PetscCall(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]));
401     mumps->irn = row;
402     mumps->jcn = col;
403     mumps->nnz = nz;
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
409 {
410   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
411   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
412   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
413   PetscInt       bs;
414   PetscMUMPSInt  *row,*col;
415 
416   PetscFunctionBegin;
417   PetscCall(MatGetBlockSize(A,&bs));
418   M          = A->rmap->N/bs;
419   mumps->val = aa->a;
420   if (reuse == MAT_INITIAL_MATRIX) {
421     ai   = aa->i; aj = aa->j;
422     nz   = bs2*aa->nz;
423     PetscCall(PetscMalloc2(nz,&row,nz,&col));
424     for (i=0; i<M; i++) {
425       ajj = aj + ai[i];
426       rnz = ai[i+1] - ai[i];
427       for (k=0; k<rnz; k++) {
428         for (j=0; j<bs; j++) {
429           for (m=0; m<bs; m++) {
430             PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[idx]));
431             PetscCall(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]));
432             idx++;
433           }
434         }
435       }
436     }
437     mumps->irn = row;
438     mumps->jcn = col;
439     mumps->nnz = nz;
440   }
441   PetscFunctionReturn(0);
442 }
443 
444 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
445 {
446   const PetscInt *ai, *aj,*ajj;
447   PetscInt        bs;
448   PetscInt64      nz,rnz,i,j,k,m;
449   PetscMUMPSInt   *row,*col;
450   PetscScalar     *val;
451   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
452   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
453 #if defined(PETSC_USE_COMPLEX)
454   PetscBool       hermitian;
455 #endif
456 
457   PetscFunctionBegin;
458 #if defined(PETSC_USE_COMPLEX)
459   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
460   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
461 #endif
462   ai   = aa->i;
463   aj   = aa->j;
464   PetscCall(MatGetBlockSize(A,&bs));
465   if (reuse == MAT_INITIAL_MATRIX) {
466     nz   = aa->nz;
467     PetscCall(PetscMalloc2(bs2*nz,&row,bs2*nz,&col));
468     if (bs>1) {
469       PetscCall(PetscMalloc1(bs2*nz,&mumps->val_alloc));
470       mumps->val = mumps->val_alloc;
471     } else {
472       mumps->val = aa->a;
473     }
474     mumps->irn = row;
475     mumps->jcn = col;
476   } else {
477     if (bs == 1) mumps->val = aa->a;
478     row = mumps->irn;
479     col = mumps->jcn;
480   }
481   val = mumps->val;
482 
483   nz = 0;
484   if (bs>1) {
485     for (i=0; i<mbs; i++) {
486       rnz = ai[i+1] - ai[i];
487       ajj = aj + ai[i];
488       for (j=0; j<rnz; j++) {
489         for (k=0; k<bs; k++) {
490           for (m=0; m<bs; m++) {
491             if (ajj[j]>i || k>=m) {
492               if (reuse == MAT_INITIAL_MATRIX) {
493                 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[nz]));
494                 PetscCall(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]));
495               }
496               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
497             }
498           }
499         }
500       }
501     }
502   } else if (reuse == MAT_INITIAL_MATRIX) {
503     for (i=0; i<mbs; i++) {
504       rnz = ai[i+1] - ai[i];
505       ajj = aj + ai[i];
506       for (j=0; j<rnz; j++) {
507         PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
508         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
509         nz++;
510       }
511     }
512     PetscCheck(nz == aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz);
513   }
514   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
515   PetscFunctionReturn(0);
516 }
517 
518 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
519 {
520   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
521   PetscInt64        nz,rnz,i,j;
522   const PetscScalar *av,*v1;
523   PetscScalar       *val;
524   PetscMUMPSInt     *row,*col;
525   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
526   PetscBool         missing;
527 #if defined(PETSC_USE_COMPLEX)
528   PetscBool         hermitian;
529 #endif
530 
531   PetscFunctionBegin;
532 #if defined(PETSC_USE_COMPLEX)
533   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
534   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
535 #endif
536   PetscCall(MatSeqAIJGetArrayRead(A,&av));
537   ai    = aa->i; aj = aa->j;
538   adiag = aa->diag;
539   PetscCall(MatMissingDiagonal_SeqAIJ(A,&missing,NULL));
540   if (reuse == MAT_INITIAL_MATRIX) {
541     /* count nz in the upper triangular part of A */
542     nz = 0;
543     if (missing) {
544       for (i=0; i<M; i++) {
545         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
546           for (j=ai[i];j<ai[i+1];j++) {
547             if (aj[j] < i) continue;
548             nz++;
549           }
550         } else {
551           nz += ai[i+1] - adiag[i];
552         }
553       }
554     } else {
555       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
556     }
557     PetscCall(PetscMalloc2(nz,&row,nz,&col));
558     PetscCall(PetscMalloc1(nz,&val));
559     mumps->nnz = nz;
560     mumps->irn = row;
561     mumps->jcn = col;
562     mumps->val = mumps->val_alloc = val;
563 
564     nz = 0;
565     if (missing) {
566       for (i=0; i<M; i++) {
567         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
568           for (j=ai[i];j<ai[i+1];j++) {
569             if (aj[j] < i) continue;
570             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
571             PetscCall(PetscMUMPSIntCast(aj[j]+shift,&col[nz]));
572             val[nz] = av[j];
573             nz++;
574           }
575         } else {
576           rnz = ai[i+1] - adiag[i];
577           ajj = aj + adiag[i];
578           v1  = av + adiag[i];
579           for (j=0; j<rnz; j++) {
580             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
581             PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
582             val[nz++] = v1[j];
583           }
584         }
585       }
586     } else {
587       for (i=0; i<M; i++) {
588         rnz = ai[i+1] - adiag[i];
589         ajj = aj + adiag[i];
590         v1  = av + adiag[i];
591         for (j=0; j<rnz; j++) {
592           PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
593           PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
594           val[nz++] = v1[j];
595         }
596       }
597     }
598   } else {
599     nz = 0;
600     val = mumps->val;
601     if (missing) {
602       for (i=0; i <M; i++) {
603         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
604           for (j=ai[i];j<ai[i+1];j++) {
605             if (aj[j] < i) continue;
606             val[nz++] = av[j];
607           }
608         } else {
609           rnz = ai[i+1] - adiag[i];
610           v1  = av + adiag[i];
611           for (j=0; j<rnz; j++) {
612             val[nz++] = v1[j];
613           }
614         }
615       }
616     } else {
617       for (i=0; i <M; i++) {
618         rnz = ai[i+1] - adiag[i];
619         v1  = av + adiag[i];
620         for (j=0; j<rnz; j++) {
621           val[nz++] = v1[j];
622         }
623       }
624     }
625   }
626   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
627   PetscFunctionReturn(0);
628 }
629 
630 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
631 {
632   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
633   PetscInt          bs;
634   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
635   PetscMUMPSInt     *row,*col;
636   const PetscScalar *av,*bv,*v1,*v2;
637   PetscScalar       *val;
638   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
639   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
640   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
641   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
642 #if defined(PETSC_USE_COMPLEX)
643   PetscBool         hermitian;
644 #endif
645 
646   PetscFunctionBegin;
647 #if defined(PETSC_USE_COMPLEX)
648   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
649   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
650 #endif
651   PetscCall(MatGetBlockSize(A,&bs));
652   rstart = A->rmap->rstart;
653   ai = aa->i;
654   aj = aa->j;
655   bi = bb->i;
656   bj = bb->j;
657   av = aa->a;
658   bv = bb->a;
659 
660   garray = mat->garray;
661 
662   if (reuse == MAT_INITIAL_MATRIX) {
663     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
664     PetscCall(PetscMalloc2(nz,&row,nz,&col));
665     PetscCall(PetscMalloc1(nz,&val));
666     /* can not decide the exact mumps->nnz now because of the SBAIJ */
667     mumps->irn = row;
668     mumps->jcn = col;
669     mumps->val = mumps->val_alloc = val;
670   } else {
671     val = mumps->val;
672   }
673 
674   jj = 0; irow = rstart;
675   for (i=0; i<mbs; i++) {
676     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
677     countA = ai[i+1] - ai[i];
678     countB = bi[i+1] - bi[i];
679     bjj    = bj + bi[i];
680     v1     = av + ai[i]*bs2;
681     v2     = bv + bi[i]*bs2;
682 
683     if (bs>1) {
684       /* A-part */
685       for (j=0; j<countA; j++) {
686         for (k=0; k<bs; k++) {
687           for (m=0; m<bs; m++) {
688             if (rstart + ajj[j]*bs>irow || k>=m) {
689               if (reuse == MAT_INITIAL_MATRIX) {
690                 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
691                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]));
692               }
693               val[jj++] = v1[j*bs2 + m + k*bs];
694             }
695           }
696         }
697       }
698 
699       /* B-part */
700       for (j=0; j < countB; j++) {
701         for (k=0; k<bs; k++) {
702           for (m=0; m<bs; m++) {
703             if (reuse == MAT_INITIAL_MATRIX) {
704               PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
705               PetscCall(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]));
706             }
707             val[jj++] = v2[j*bs2 + m + k*bs];
708           }
709         }
710       }
711     } else {
712       /* A-part */
713       for (j=0; j<countA; j++) {
714         if (reuse == MAT_INITIAL_MATRIX) {
715           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
716           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
717         }
718         val[jj++] = v1[j];
719       }
720 
721       /* B-part */
722       for (j=0; j < countB; j++) {
723         if (reuse == MAT_INITIAL_MATRIX) {
724           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
725           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
726         }
727         val[jj++] = v2[j];
728       }
729     }
730     irow+=bs;
731   }
732   mumps->nnz = jj;
733   PetscFunctionReturn(0);
734 }
735 
736 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
737 {
738   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
739   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
740   PetscMUMPSInt     *row,*col;
741   const PetscScalar *av, *bv,*v1,*v2;
742   PetscScalar       *val;
743   Mat               Ad,Ao;
744   Mat_SeqAIJ        *aa;
745   Mat_SeqAIJ        *bb;
746 
747   PetscFunctionBegin;
748   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
749   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
750   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
751 
752   aa = (Mat_SeqAIJ*)(Ad)->data;
753   bb = (Mat_SeqAIJ*)(Ao)->data;
754   ai = aa->i;
755   aj = aa->j;
756   bi = bb->i;
757   bj = bb->j;
758 
759   rstart = A->rmap->rstart;
760 
761   if (reuse == MAT_INITIAL_MATRIX) {
762     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
763     PetscCall(PetscMalloc2(nz,&row,nz,&col));
764     PetscCall(PetscMalloc1(nz,&val));
765     mumps->nnz = nz;
766     mumps->irn = row;
767     mumps->jcn = col;
768     mumps->val = mumps->val_alloc = val;
769   } else {
770     val = mumps->val;
771   }
772 
773   jj = 0; irow = rstart;
774   for (i=0; i<m; i++) {
775     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
776     countA = ai[i+1] - ai[i];
777     countB = bi[i+1] - bi[i];
778     bjj    = bj + bi[i];
779     v1     = av + ai[i];
780     v2     = bv + bi[i];
781 
782     /* A-part */
783     for (j=0; j<countA; j++) {
784       if (reuse == MAT_INITIAL_MATRIX) {
785         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
786         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
787       }
788       val[jj++] = v1[j];
789     }
790 
791     /* B-part */
792     for (j=0; j < countB; j++) {
793       if (reuse == MAT_INITIAL_MATRIX) {
794         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
795         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
796       }
797       val[jj++] = v2[j];
798     }
799     irow++;
800   }
801   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
802   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
803   PetscFunctionReturn(0);
804 }
805 
806 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
807 {
808   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
809   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
810   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
811   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
812   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
813   const PetscInt    bs2=mat->bs2;
814   PetscInt          bs;
815   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
816   PetscMUMPSInt     *row,*col;
817   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
818   PetscScalar       *val;
819 
820   PetscFunctionBegin;
821   PetscCall(MatGetBlockSize(A,&bs));
822   if (reuse == MAT_INITIAL_MATRIX) {
823     nz   = bs2*(aa->nz + bb->nz);
824     PetscCall(PetscMalloc2(nz,&row,nz,&col));
825     PetscCall(PetscMalloc1(nz,&val));
826     mumps->nnz = nz;
827     mumps->irn = row;
828     mumps->jcn = col;
829     mumps->val = mumps->val_alloc = val;
830   } else {
831     val = mumps->val;
832   }
833 
834   jj = 0; irow = rstart;
835   for (i=0; i<mbs; i++) {
836     countA = ai[i+1] - ai[i];
837     countB = bi[i+1] - bi[i];
838     ajj    = aj + ai[i];
839     bjj    = bj + bi[i];
840     v1     = av + bs2*ai[i];
841     v2     = bv + bs2*bi[i];
842 
843     idx = 0;
844     /* A-part */
845     for (k=0; k<countA; k++) {
846       for (j=0; j<bs; j++) {
847         for (n=0; n<bs; n++) {
848           if (reuse == MAT_INITIAL_MATRIX) {
849             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
850             PetscCall(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]));
851           }
852           val[jj++] = v1[idx++];
853         }
854       }
855     }
856 
857     idx = 0;
858     /* B-part */
859     for (k=0; k<countB; k++) {
860       for (j=0; j<bs; j++) {
861         for (n=0; n<bs; n++) {
862           if (reuse == MAT_INITIAL_MATRIX) {
863             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
864             PetscCall(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]));
865           }
866           val[jj++] = v2[idx++];
867         }
868       }
869     }
870     irow += bs;
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
876 {
877   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
878   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
879   PetscMUMPSInt     *row,*col;
880   const PetscScalar *av, *bv,*v1,*v2;
881   PetscScalar       *val;
882   Mat               Ad,Ao;
883   Mat_SeqAIJ        *aa;
884   Mat_SeqAIJ        *bb;
885 #if defined(PETSC_USE_COMPLEX)
886   PetscBool         hermitian;
887 #endif
888 
889   PetscFunctionBegin;
890 #if defined(PETSC_USE_COMPLEX)
891   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
892   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
893 #endif
894   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
895   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
896   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
897 
898   aa    = (Mat_SeqAIJ*)(Ad)->data;
899   bb    = (Mat_SeqAIJ*)(Ao)->data;
900   ai    = aa->i;
901   aj    = aa->j;
902   adiag = aa->diag;
903   bi    = bb->i;
904   bj    = bb->j;
905 
906   rstart = A->rmap->rstart;
907 
908   if (reuse == MAT_INITIAL_MATRIX) {
909     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
910     nzb = 0;    /* num of upper triangular entries in mat->B */
911     for (i=0; i<m; i++) {
912       nza   += (ai[i+1] - adiag[i]);
913       countB = bi[i+1] - bi[i];
914       bjj    = bj + bi[i];
915       for (j=0; j<countB; j++) {
916         if (garray[bjj[j]] > rstart) nzb++;
917       }
918     }
919 
920     nz   = nza + nzb; /* total nz of upper triangular part of mat */
921     PetscCall(PetscMalloc2(nz,&row,nz,&col));
922     PetscCall(PetscMalloc1(nz,&val));
923     mumps->nnz = nz;
924     mumps->irn = row;
925     mumps->jcn = col;
926     mumps->val = mumps->val_alloc = val;
927   } else {
928     val = mumps->val;
929   }
930 
931   jj = 0; irow = rstart;
932   for (i=0; i<m; i++) {
933     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
934     v1     = av + adiag[i];
935     countA = ai[i+1] - adiag[i];
936     countB = bi[i+1] - bi[i];
937     bjj    = bj + bi[i];
938     v2     = bv + bi[i];
939 
940     /* A-part */
941     for (j=0; j<countA; j++) {
942       if (reuse == MAT_INITIAL_MATRIX) {
943         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
944         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
945       }
946       val[jj++] = v1[j];
947     }
948 
949     /* B-part */
950     for (j=0; j < countB; j++) {
951       if (garray[bjj[j]] > rstart) {
952         if (reuse == MAT_INITIAL_MATRIX) {
953           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
954           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
955         }
956         val[jj++] = v2[j];
957       }
958     }
959     irow++;
960   }
961   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
962   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatDestroy_MUMPS(Mat A)
967 {
968   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
969 
970   PetscFunctionBegin;
971   PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
972   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
973   PetscCall(VecScatterDestroy(&mumps->scat_sol));
974   PetscCall(VecDestroy(&mumps->b_seq));
975   PetscCall(VecDestroy(&mumps->x_seq));
976   PetscCall(PetscFree(mumps->id.perm_in));
977   PetscCall(PetscFree2(mumps->irn,mumps->jcn));
978   PetscCall(PetscFree(mumps->val_alloc));
979   PetscCall(PetscFree(mumps->info));
980   PetscCall(MatMumpsResetSchur_Private(mumps));
981   mumps->id.job = JOB_END;
982   PetscMUMPS_c(mumps);
983   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
984 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
985   if (mumps->use_petsc_omp_support) {
986     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
987     PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
988     PetscCall(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps));
989   }
990 #endif
991   PetscCall(PetscFree(mumps->ia_alloc));
992   PetscCall(PetscFree(mumps->ja_alloc));
993   PetscCall(PetscFree(mumps->recvcount));
994   PetscCall(PetscFree(mumps->reqs));
995   PetscCall(PetscFree(mumps->irhs_loc));
996   if (mumps->mumps_comm != MPI_COMM_NULL) PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm));
997   PetscCall(PetscFree(A->data));
998 
999   /* clear composed functions */
1000   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1001   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL));
1002   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL));
1003   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL));
1004   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL));
1005   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL));
1006   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL));
1007   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL));
1008   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL));
1009   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL));
1010   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL));
1011   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL));
1012   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL));
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1017 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1018 {
1019   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1020   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1021   PetscInt           i,m,M,rstart;
1022 
1023   PetscFunctionBegin;
1024   PetscCall(MatGetSize(A,&M,NULL));
1025   PetscCall(MatGetLocalSize(A,&m,NULL));
1026   PetscCheck(M <= PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1027   if (ompsize == 1) {
1028     if (!mumps->irhs_loc) {
1029       mumps->nloc_rhs = m;
1030       PetscCall(PetscMalloc1(m,&mumps->irhs_loc));
1031       PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1032       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1033     }
1034     mumps->id.rhs_loc = (MumpsScalar*)array;
1035   } else {
1036   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1037     const PetscInt  *ranges;
1038     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1039     MPI_Group       petsc_group,omp_group;
1040     PetscScalar     *recvbuf=NULL;
1041 
1042     if (mumps->is_omp_master) {
1043       /* Lazily initialize the omp stuff for distributed rhs */
1044       if (!mumps->irhs_loc) {
1045         PetscCall(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks));
1046         PetscCall(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps));
1047         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group));
1048         PetscCallMPI(MPI_Comm_group(mumps->omp_comm,&omp_group));
1049         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1050         PetscCallMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks));
1051 
1052         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1053         mumps->nloc_rhs = 0;
1054         PetscCall(MatGetOwnershipRanges(A,&ranges));
1055         for (j=0; j<ompsize; j++) {
1056           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1057           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1058         }
1059         PetscCall(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc));
1060         for (j=k=0; j<ompsize; j++) {
1061           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1062         }
1063 
1064         PetscCall(PetscFree2(omp_ranks,petsc_ranks));
1065         PetscCallMPI(MPI_Group_free(&petsc_group));
1066         PetscCallMPI(MPI_Group_free(&omp_group));
1067       }
1068 
1069       /* Realloc buffers when current nrhs is bigger than what we have met */
1070       if (nrhs > mumps->max_nrhs) {
1071         PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
1072         PetscCall(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf));
1073         mumps->max_nrhs = nrhs;
1074       }
1075 
1076       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1077       for (j=0; j<ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]));
1078       mumps->rhs_disps[0] = 0;
1079       for (j=1; j<ompsize; j++) {
1080         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1081         PetscCheck(mumps->rhs_disps[j] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1082       }
1083       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1084     }
1085 
1086     PetscCall(PetscMPIIntCast(m*nrhs,&sendcount));
1087     PetscCallMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm));
1088 
1089     if (mumps->is_omp_master) {
1090       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1091         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1092         for (j=0; j<ompsize; j++) {
1093           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1094           dst = dstbase;
1095           for (i=0; i<nrhs; i++) {
1096             PetscCall(PetscArraycpy(dst,src,mumps->rhs_nrow[j]));
1097             src += mumps->rhs_nrow[j];
1098             dst += mumps->nloc_rhs;
1099           }
1100           dstbase += mumps->rhs_nrow[j];
1101         }
1102       }
1103       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1104     }
1105   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1106   }
1107   mumps->id.nrhs     = nrhs;
1108   mumps->id.nloc_rhs = mumps->nloc_rhs;
1109   mumps->id.lrhs_loc = mumps->nloc_rhs;
1110   mumps->id.irhs_loc = mumps->irhs_loc;
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1115 {
1116   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1117   const PetscScalar  *rarray = NULL;
1118   PetscScalar        *array;
1119   IS                 is_iden,is_petsc;
1120   PetscInt           i;
1121   PetscBool          second_solve = PETSC_FALSE;
1122   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1123 
1124   PetscFunctionBegin;
1125   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1));
1126   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2));
1127 
1128   if (A->factorerrortype) {
1129     PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1130     PetscCall(VecSetInf(x));
1131     PetscFunctionReturn(0);
1132   }
1133 
1134   mumps->id.nrhs = 1;
1135   if (mumps->petsc_size > 1) {
1136     if (mumps->ICNTL20 == 10) {
1137       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1138       PetscCall(VecGetArrayRead(b,&rarray));
1139       PetscCall(MatMumpsSetUpDistRHSInfo(A,1,rarray));
1140     } else {
1141       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1142       PetscCall(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1143       PetscCall(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1144       if (!mumps->myid) {
1145         PetscCall(VecGetArray(mumps->b_seq,&array));
1146         mumps->id.rhs = (MumpsScalar*)array;
1147       }
1148     }
1149   } else {  /* petsc_size == 1 */
1150     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1151     PetscCall(VecCopy(b,x));
1152     PetscCall(VecGetArray(x,&array));
1153     mumps->id.rhs = (MumpsScalar*)array;
1154   }
1155 
1156   /*
1157      handle condensation step of Schur complement (if any)
1158      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1159      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1160      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1161      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1162   */
1163   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1164     PetscCheck(mumps->petsc_size <= 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1165     second_solve = PETSC_TRUE;
1166     PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1167   }
1168   /* solve phase */
1169   /*-------------*/
1170   mumps->id.job = JOB_SOLVE;
1171   PetscMUMPS_c(mumps);
1172   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1173 
1174   /* handle expansion step of Schur complement (if any) */
1175   if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1176 
1177   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1178     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1179       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1180       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1181     }
1182     if (!mumps->scat_sol) { /* create scatter scat_sol */
1183       PetscInt *isol2_loc=NULL;
1184       PetscCall(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */
1185       PetscCall(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc));
1186       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1187       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc));  /* to */
1188       PetscCall(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol));
1189       PetscCall(ISDestroy(&is_iden));
1190       PetscCall(ISDestroy(&is_petsc));
1191       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1192     }
1193 
1194     PetscCall(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1195     PetscCall(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1196   }
1197 
1198   if (mumps->petsc_size > 1) {
1199     if (mumps->ICNTL20 == 10) {
1200       PetscCall(VecRestoreArrayRead(b,&rarray));
1201     } else if (!mumps->myid) {
1202       PetscCall(VecRestoreArray(mumps->b_seq,&array));
1203     }
1204   } else PetscCall(VecRestoreArray(x,&array));
1205 
1206   PetscCall(PetscLogFlops(2.0*mumps->id.RINFO(3)));
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1211 {
1212   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1213 
1214   PetscFunctionBegin;
1215   mumps->id.ICNTL(9) = 0;
1216   PetscCall(MatSolve_MUMPS(A,b,x));
1217   mumps->id.ICNTL(9) = 1;
1218   PetscFunctionReturn(0);
1219 }
1220 
1221 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1222 {
1223   Mat               Bt = NULL;
1224   PetscBool         denseX,denseB,flg,flgT;
1225   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1226   PetscInt          i,nrhs,M;
1227   PetscScalar       *array;
1228   const PetscScalar *rbray;
1229   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1230   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1231   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1232   IS                is_to,is_from;
1233   PetscInt          k,proc,j,m,myrstart;
1234   const PetscInt    *rstart;
1235   Vec               v_mpi,msol_loc;
1236   VecScatter        scat_sol;
1237   Vec               b_seq;
1238   VecScatter        scat_rhs;
1239   PetscScalar       *aa;
1240   PetscInt          spnr,*ia,*ja;
1241   Mat_MPIAIJ        *b = NULL;
1242 
1243   PetscFunctionBegin;
1244   PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL));
1245   PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1246 
1247   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL));
1248   if (denseB) {
1249     PetscCheck(B->rmap->n == X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1250     mumps->id.ICNTL(20)= 0; /* dense RHS */
1251   } else { /* sparse B */
1252     PetscCheck(X != B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1253     PetscCall(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT));
1254     if (flgT) { /* input B is transpose of actural RHS matrix,
1255                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1256       PetscCall(MatTransposeGetMat(B,&Bt));
1257     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1258     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1259   }
1260 
1261   PetscCall(MatGetSize(B,&M,&nrhs));
1262   mumps->id.nrhs = nrhs;
1263   mumps->id.lrhs = M;
1264   mumps->id.rhs  = NULL;
1265 
1266   if (mumps->petsc_size == 1) {
1267     PetscScalar *aa;
1268     PetscInt    spnr,*ia,*ja;
1269     PetscBool   second_solve = PETSC_FALSE;
1270 
1271     PetscCall(MatDenseGetArray(X,&array));
1272     mumps->id.rhs = (MumpsScalar*)array;
1273 
1274     if (denseB) {
1275       /* copy B to X */
1276       PetscCall(MatDenseGetArrayRead(B,&rbray));
1277       PetscCall(PetscArraycpy(array,rbray,M*nrhs));
1278       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1279     } else { /* sparse B */
1280       PetscCall(MatSeqAIJGetArray(Bt,&aa));
1281       PetscCall(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1282       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1283       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1284       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1285     }
1286     /* handle condensation step of Schur complement (if any) */
1287     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1288       second_solve = PETSC_TRUE;
1289       PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1290     }
1291     /* solve phase */
1292     /*-------------*/
1293     mumps->id.job = JOB_SOLVE;
1294     PetscMUMPS_c(mumps);
1295     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1296 
1297     /* handle expansion step of Schur complement (if any) */
1298     if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1299     if (!denseB) { /* sparse B */
1300       PetscCall(MatSeqAIJRestoreArray(Bt,&aa));
1301       PetscCall(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1302       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1303     }
1304     PetscCall(MatDenseRestoreArray(X,&array));
1305     PetscFunctionReturn(0);
1306   }
1307 
1308   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1309   PetscCheck(mumps->petsc_size <= 1 || !mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1310 
1311   /* create msol_loc to hold mumps local solution */
1312   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1313   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1314 
1315   lsol_loc  = mumps->id.lsol_loc;
1316   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1317   PetscCall(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc));
1318   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1319   mumps->id.isol_loc = isol_loc;
1320 
1321   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc));
1322 
1323   if (denseB) {
1324     if (mumps->ICNTL20 == 10) {
1325       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1326       PetscCall(MatDenseGetArrayRead(B,&rbray));
1327       PetscCall(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray));
1328       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1329       PetscCall(MatGetLocalSize(B,&m,NULL));
1330       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi));
1331     } else {
1332       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1333       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1334         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1335         0, re-arrange B into desired order, which is a local operation.
1336       */
1337 
1338       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1339       /* wrap dense rhs matrix B into a vector v_mpi */
1340       PetscCall(MatGetLocalSize(B,&m,NULL));
1341       PetscCall(MatDenseGetArray(B,&bray));
1342       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1343       PetscCall(MatDenseRestoreArray(B,&bray));
1344 
1345       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1346       if (!mumps->myid) {
1347         PetscInt *idx;
1348         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1349         PetscCall(PetscMalloc1(nrhs*M,&idx));
1350         PetscCall(MatGetOwnershipRanges(B,&rstart));
1351         k = 0;
1352         for (proc=0; proc<mumps->petsc_size; proc++) {
1353           for (j=0; j<nrhs; j++) {
1354             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1355           }
1356         }
1357 
1358         PetscCall(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq));
1359         PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to));
1360         PetscCall(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from));
1361       } else {
1362         PetscCall(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq));
1363         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to));
1364         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from));
1365       }
1366       PetscCall(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs));
1367       PetscCall(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1368       PetscCall(ISDestroy(&is_to));
1369       PetscCall(ISDestroy(&is_from));
1370       PetscCall(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1371 
1372       if (!mumps->myid) { /* define rhs on the host */
1373         PetscCall(VecGetArray(b_seq,&bray));
1374         mumps->id.rhs = (MumpsScalar*)bray;
1375         PetscCall(VecRestoreArray(b_seq,&bray));
1376       }
1377     }
1378   } else { /* sparse B */
1379     b = (Mat_MPIAIJ*)Bt->data;
1380 
1381     /* wrap dense X into a vector v_mpi */
1382     PetscCall(MatGetLocalSize(X,&m,NULL));
1383     PetscCall(MatDenseGetArray(X,&bray));
1384     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1385     PetscCall(MatDenseRestoreArray(X,&bray));
1386 
1387     if (!mumps->myid) {
1388       PetscCall(MatSeqAIJGetArray(b->A,&aa));
1389       PetscCall(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1390       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1391       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1392       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1393     } else {
1394       mumps->id.irhs_ptr    = NULL;
1395       mumps->id.irhs_sparse = NULL;
1396       mumps->id.nz_rhs      = 0;
1397       mumps->id.rhs_sparse  = NULL;
1398     }
1399   }
1400 
1401   /* solve phase */
1402   /*-------------*/
1403   mumps->id.job = JOB_SOLVE;
1404   PetscMUMPS_c(mumps);
1405   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1406 
1407   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1408   PetscCall(MatDenseGetArray(X,&array));
1409   PetscCall(VecPlaceArray(v_mpi,array));
1410 
1411   /* create scatter scat_sol */
1412   PetscCall(MatGetOwnershipRanges(X,&rstart));
1413   /* iidx: index for scatter mumps solution to petsc X */
1414 
1415   PetscCall(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from));
1416   PetscCall(PetscMalloc1(nlsol_loc,&idxx));
1417   for (i=0; i<lsol_loc; i++) {
1418     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1419 
1420     for (proc=0; proc<mumps->petsc_size; proc++) {
1421       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1422         myrstart = rstart[proc];
1423         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1424         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1425         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1426         break;
1427       }
1428     }
1429 
1430     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1431   }
1432   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to));
1433   PetscCall(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol));
1434   PetscCall(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1435   PetscCall(ISDestroy(&is_from));
1436   PetscCall(ISDestroy(&is_to));
1437   PetscCall(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1438   PetscCall(MatDenseRestoreArray(X,&array));
1439 
1440   /* free spaces */
1441   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1442   mumps->id.isol_loc = isol_loc_save;
1443 
1444   PetscCall(PetscFree2(sol_loc,isol_loc));
1445   PetscCall(PetscFree(idxx));
1446   PetscCall(VecDestroy(&msol_loc));
1447   PetscCall(VecDestroy(&v_mpi));
1448   if (!denseB) {
1449     if (!mumps->myid) {
1450       b = (Mat_MPIAIJ*)Bt->data;
1451       PetscCall(MatSeqAIJRestoreArray(b->A,&aa));
1452       PetscCall(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1453       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1454     }
1455   } else {
1456     if (mumps->ICNTL20 == 0) {
1457       PetscCall(VecDestroy(&b_seq));
1458       PetscCall(VecScatterDestroy(&scat_rhs));
1459     }
1460   }
1461   PetscCall(VecScatterDestroy(&scat_sol));
1462   PetscCall(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3)));
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1467 {
1468   PetscBool      flg;
1469   Mat            B;
1470 
1471   PetscFunctionBegin;
1472   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
1473   PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1474 
1475   /* Create B=Bt^T that uses Bt's data structure */
1476   PetscCall(MatCreateTranspose(Bt,&B));
1477 
1478   PetscCall(MatMatSolve_MUMPS(A,B,X));
1479   PetscCall(MatDestroy(&B));
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #if !defined(PETSC_USE_COMPLEX)
1484 /*
1485   input:
1486    F:        numeric factor
1487   output:
1488    nneg:     total number of negative pivots
1489    nzero:    total number of zero pivots
1490    npos:     (global dimension of F) - nneg - nzero
1491 */
1492 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1493 {
1494   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1495   PetscMPIInt    size;
1496 
1497   PetscFunctionBegin;
1498   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size));
1499   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1500   PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13));
1501 
1502   if (nneg) *nneg = mumps->id.INFOG(12);
1503   if (nzero || npos) {
1504     PetscCheck(mumps->id.ICNTL(24) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1505     if (nzero) *nzero = mumps->id.INFOG(28);
1506     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1507   }
1508   PetscFunctionReturn(0);
1509 }
1510 #endif
1511 
1512 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1513 {
1514   PetscInt       i,nreqs;
1515   PetscMUMPSInt  *irn,*jcn;
1516   PetscMPIInt    count;
1517   PetscInt64     totnnz,remain;
1518   const PetscInt osize=mumps->omp_comm_size;
1519   PetscScalar    *val;
1520 
1521   PetscFunctionBegin;
1522   if (osize > 1) {
1523     if (reuse == MAT_INITIAL_MATRIX) {
1524       /* master first gathers counts of nonzeros to receive */
1525       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize,&mumps->recvcount));
1526       PetscCallMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm));
1527 
1528       /* Then each computes number of send/recvs */
1529       if (mumps->is_omp_master) {
1530         /* Start from 1 since self communication is not done in MPI */
1531         nreqs = 0;
1532         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1533       } else {
1534         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1535       }
1536       PetscCall(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */
1537 
1538       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1539          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1540          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1541          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1542        */
1543       nreqs = 0; /* counter for actual send/recvs */
1544       if (mumps->is_omp_master) {
1545         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1546         PetscCall(PetscMalloc2(totnnz,&irn,totnnz,&jcn));
1547         PetscCall(PetscMalloc1(totnnz,&val));
1548 
1549         /* Self communication */
1550         PetscCall(PetscArraycpy(irn,mumps->irn,mumps->nnz));
1551         PetscCall(PetscArraycpy(jcn,mumps->jcn,mumps->nnz));
1552         PetscCall(PetscArraycpy(val,mumps->val,mumps->nnz));
1553 
1554         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1555         PetscCall(PetscFree2(mumps->irn,mumps->jcn));
1556         PetscCall(PetscFree(mumps->val_alloc));
1557         mumps->nnz = totnnz;
1558         mumps->irn = irn;
1559         mumps->jcn = jcn;
1560         mumps->val = mumps->val_alloc = val;
1561 
1562         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1563         jcn += mumps->recvcount[0];
1564         val += mumps->recvcount[0];
1565 
1566         /* Remote communication */
1567         for (i=1; i<osize; i++) {
1568           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1569           remain = mumps->recvcount[i] - count;
1570           while (count>0) {
1571             PetscCallMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1572             PetscCallMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1573             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1574             irn    += count;
1575             jcn    += count;
1576             val    += count;
1577             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1578             remain -= count;
1579           }
1580         }
1581       } else {
1582         irn    = mumps->irn;
1583         jcn    = mumps->jcn;
1584         val    = mumps->val;
1585         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1586         remain = mumps->nnz - count;
1587         while (count>0) {
1588           PetscCallMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1589           PetscCallMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1590           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1591           irn    += count;
1592           jcn    += count;
1593           val    += count;
1594           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1595           remain -= count;
1596         }
1597       }
1598     } else {
1599       nreqs = 0;
1600       if (mumps->is_omp_master) {
1601         val = mumps->val + mumps->recvcount[0];
1602         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1603           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1604           remain = mumps->recvcount[i] - count;
1605           while (count>0) {
1606             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1607             val    += count;
1608             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1609             remain -= count;
1610           }
1611         }
1612       } else {
1613         val    = mumps->val;
1614         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1615         remain = mumps->nnz - count;
1616         while (count>0) {
1617           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1618           val    += count;
1619           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1620           remain -= count;
1621         }
1622       }
1623     }
1624     PetscCallMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE));
1625     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1626   }
1627   PetscFunctionReturn(0);
1628 }
1629 
1630 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1631 {
1632   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1633   PetscBool      isMPIAIJ;
1634 
1635   PetscFunctionBegin;
1636   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1637     if (mumps->id.INFOG(1) == -6) {
1638       PetscCall(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1639     }
1640     PetscCall(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1641     PetscFunctionReturn(0);
1642   }
1643 
1644   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1645   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps));
1646 
1647   /* numerical factorization phase */
1648   /*-------------------------------*/
1649   mumps->id.job = JOB_FACTNUMERIC;
1650   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1651     if (!mumps->myid) {
1652       mumps->id.a = (MumpsScalar*)mumps->val;
1653     }
1654   } else {
1655     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1656   }
1657   PetscMUMPS_c(mumps);
1658   if (mumps->id.INFOG(1) < 0) {
1659     if (A->erroriffailure) {
1660       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
1661     } else {
1662       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1663         PetscCall(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1664         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1665       } else if (mumps->id.INFOG(1) == -13) {
1666         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1667         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1668       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1669         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1670         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1671       } else {
1672         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1673         F->factorerrortype = MAT_FACTOR_OTHER;
1674       }
1675     }
1676   }
1677   PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16));
1678 
1679   F->assembled    = PETSC_TRUE;
1680 
1681   if (F->schur) { /* reset Schur status to unfactored */
1682 #if defined(PETSC_HAVE_CUDA)
1683     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1684 #endif
1685     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1686       mumps->id.ICNTL(19) = 2;
1687       PetscCall(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur));
1688     }
1689     PetscCall(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED));
1690   }
1691 
1692   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1693   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1694 
1695   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1696   if (mumps->petsc_size > 1) {
1697     PetscInt    lsol_loc;
1698     PetscScalar *sol_loc;
1699 
1700     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ));
1701 
1702     /* distributed solution; Create x_seq=sol_loc for repeated use */
1703     if (mumps->x_seq) {
1704       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1705       PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
1706       PetscCall(VecDestroy(&mumps->x_seq));
1707     }
1708     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1709     PetscCall(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc));
1710     mumps->id.lsol_loc = lsol_loc;
1711     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1712     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq));
1713   }
1714   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1715   PetscFunctionReturn(0);
1716 }
1717 
1718 /* Sets MUMPS options from the options database */
1719 PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A)
1720 {
1721   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1722   PetscMUMPSInt  icntl=0;
1723   PetscInt       info[80],i,ninfo=80,rbs,cbs;
1724   PetscBool      flg=PETSC_FALSE;
1725 
1726   PetscFunctionBegin;
1727   PetscOptionsBegin(PetscObjectComm((PetscObject)F),((PetscObject)F)->prefix,"MUMPS Options","Mat");
1728   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg));
1729   if (flg) mumps->id.ICNTL(1) = icntl;
1730   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg));
1731   if (flg) mumps->id.ICNTL(2) = icntl;
1732   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg));
1733   if (flg) mumps->id.ICNTL(3) = icntl;
1734 
1735   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg));
1736   if (flg) mumps->id.ICNTL(4) = icntl;
1737   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1738 
1739   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg));
1740   if (flg) mumps->id.ICNTL(6) = icntl;
1741 
1742   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg));
1743   if (flg) {
1744     PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1745     mumps->id.ICNTL(7) = icntl;
1746   }
1747 
1748   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL));
1749   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1750   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL));
1751   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL));
1752   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL));
1753   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL));
1754   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL));
1755   PetscCall(MatGetBlockSizes(A,&rbs,&cbs));
1756   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1757   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15","ICNTL(15): compression of the input matrix resulting from a block format","None",mumps->id.ICNTL(15),&mumps->id.ICNTL(15),&flg));
1758   if (flg) {
1759     PetscCheck(mumps->id.ICNTL(15) <= 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Positive -mat_mumps_icntl_15 not handled");
1760     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
1761   }
1762   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL));
1763   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1764     PetscCall(MatDestroy(&F->schur));
1765     PetscCall(MatMumpsResetSchur_Private(mumps));
1766   }
1767 
1768   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1769      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1770      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1771      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1772      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1773      In short, we could not use distributed RHS with MPICH until v4.0b1.
1774    */
1775 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1776   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1777 #else
1778   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1779 #endif
1780   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg));
1781   PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20);
1782 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1783   PetscCheck(!flg || mumps->ICNTL20 != 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1784 #endif
1785   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1786 
1787   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL));
1788   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL));
1789   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL));
1790   if (mumps->id.ICNTL(24)) {
1791     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1792   }
1793 
1794   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL));
1795   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL));
1796   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL));
1797   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL));
1798   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL));
1799   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1800   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL));
1801   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1802   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL));
1803   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL));
1804   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL));
1805   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL));
1806 
1807   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL));
1808   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL));
1809   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL));
1810   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL));
1811   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL));
1812   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL));
1813 
1814   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1815 
1816   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL));
1817   if (ninfo) {
1818     PetscCheck(ninfo <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo);
1819     PetscCall(PetscMalloc1(ninfo,&mumps->info));
1820     mumps->ninfo = ninfo;
1821     for (i=0; i<ninfo; i++) {
1822       PetscCheck(info[i] >= 0 && info[i] <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo);
1823       mumps->info[i] = info[i];
1824     }
1825   }
1826 
1827   PetscOptionsEnd();
1828   PetscFunctionReturn(0);
1829 }
1830 
1831 PetscErrorCode MatSetFromOptions_MUMPS_OpenMP(Mat F,Mat A)
1832 {
1833   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1834   PetscInt       nthreads=0;
1835 
1836   PetscFunctionBegin;
1837   mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1838   PetscCallMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size));
1839   PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */
1840 
1841   PetscCall(PetscOptionsHasName(NULL,((PetscObject)F)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support));
1842   if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1843   PetscCall(PetscOptionsGetInt(NULL,((PetscObject)F)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL));
1844   if (mumps->use_petsc_omp_support) {
1845 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1846     PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl));
1847     PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master));
1848 #else
1849     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:"");
1850 #endif
1851   } else {
1852     mumps->omp_comm      = PETSC_COMM_SELF;
1853     mumps->mumps_comm    = mumps->petsc_comm;
1854     mumps->is_omp_master = PETSC_TRUE;
1855   }
1856   PetscCallMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size));
1857   mumps->reqs = NULL;
1858   mumps->tag  = 0;
1859 
1860   /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1861   if (mumps->mumps_comm != MPI_COMM_NULL) {
1862     PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm));
1863   }
1864 
1865   mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1866   mumps->id.job = JOB_INIT;
1867   mumps->id.par = 1;  /* host participates factorizaton and solve */
1868   mumps->id.sym = mumps->sym;
1869 
1870   PetscMUMPS_c(mumps);
1871   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
1872 
1873   /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1874      For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1875    */
1876   PetscCallMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm));
1877   PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm));
1878 
1879   mumps->scat_rhs = NULL;
1880   mumps->scat_sol = NULL;
1881 
1882   /* set PETSc-MUMPS default options - override MUMPS default */
1883   mumps->id.ICNTL(3) = 0;
1884   mumps->id.ICNTL(4) = 0;
1885   if (mumps->petsc_size == 1) {
1886     mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1887     mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1888   } else {
1889     mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1890     mumps->id.ICNTL(21) = 1;   /* distributed solution */
1891   }
1892 
1893   /* schur */
1894   mumps->id.size_schur    = 0;
1895   mumps->id.listvar_schur = NULL;
1896   mumps->id.schur         = NULL;
1897   mumps->sizeredrhs       = 0;
1898   mumps->schur_sol        = NULL;
1899   mumps->schur_sizesol    = 0;
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1904 {
1905   PetscFunctionBegin;
1906   if (mumps->id.INFOG(1) < 0) {
1907     if (A->erroriffailure) {
1908       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1));
1909     } else {
1910       if (mumps->id.INFOG(1) == -6) {
1911         PetscCall(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1912         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1913       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1914         PetscCall(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1915         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1916       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1917         PetscCall(PetscInfo(F,"Empty matrix\n"));
1918       } else {
1919         PetscCall(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1920         F->factorerrortype = MAT_FACTOR_OTHER;
1921       }
1922     }
1923   }
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1928 {
1929   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1930   Vec            b;
1931   const PetscInt M = A->rmap->N;
1932 
1933   PetscFunctionBegin;
1934   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1935     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1936     PetscFunctionReturn(0);
1937   }
1938 
1939   /* Set MUMPS options from the options database */
1940   PetscCall(MatSetFromOptions_MUMPS(F,A));
1941 
1942   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1943   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
1944 
1945   /* analysis phase */
1946   /*----------------*/
1947   mumps->id.job = JOB_FACTSYMBOLIC;
1948   mumps->id.n   = M;
1949   switch (mumps->id.ICNTL(18)) {
1950   case 0:  /* centralized assembled matrix input */
1951     if (!mumps->myid) {
1952       mumps->id.nnz = mumps->nnz;
1953       mumps->id.irn = mumps->irn;
1954       mumps->id.jcn = mumps->jcn;
1955       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1956       if (r) {
1957         mumps->id.ICNTL(7) = 1;
1958         if (!mumps->myid) {
1959           const PetscInt *idx;
1960           PetscInt       i;
1961 
1962           PetscCall(PetscMalloc1(M,&mumps->id.perm_in));
1963           PetscCall(ISGetIndices(r,&idx));
1964           for (i=0; i<M; i++) PetscCall(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
1965           PetscCall(ISRestoreIndices(r,&idx));
1966         }
1967       }
1968     }
1969     break;
1970   case 3:  /* distributed assembled matrix input (size>1) */
1971     mumps->id.nnz_loc = mumps->nnz;
1972     mumps->id.irn_loc = mumps->irn;
1973     mumps->id.jcn_loc = mumps->jcn;
1974     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1975     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1976       PetscCall(MatCreateVecs(A,NULL,&b));
1977       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
1978       PetscCall(VecDestroy(&b));
1979     }
1980     break;
1981   }
1982   PetscMUMPS_c(mumps);
1983   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
1984 
1985   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
1986   F->ops->solve           = MatSolve_MUMPS;
1987   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
1988   F->ops->matsolve        = MatMatSolve_MUMPS;
1989   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
1990 
1991   mumps->matstruc = SAME_NONZERO_PATTERN;
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 /* Note the Petsc r and c permutations are ignored */
1996 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1997 {
1998   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1999   Vec            b;
2000   const PetscInt M = A->rmap->N;
2001 
2002   PetscFunctionBegin;
2003   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2004     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2005     PetscFunctionReturn(0);
2006   }
2007 
2008   /* Set MUMPS options from the options database */
2009   PetscCall(MatSetFromOptions_MUMPS(F,A));
2010 
2011   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2012   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2013 
2014   /* analysis phase */
2015   /*----------------*/
2016   mumps->id.job = JOB_FACTSYMBOLIC;
2017   mumps->id.n   = M;
2018   switch (mumps->id.ICNTL(18)) {
2019   case 0:  /* centralized assembled matrix input */
2020     if (!mumps->myid) {
2021       mumps->id.nnz = mumps->nnz;
2022       mumps->id.irn = mumps->irn;
2023       mumps->id.jcn = mumps->jcn;
2024       if (mumps->id.ICNTL(6)>1) {
2025         mumps->id.a = (MumpsScalar*)mumps->val;
2026       }
2027     }
2028     break;
2029   case 3:  /* distributed assembled matrix input (size>1) */
2030     mumps->id.nnz_loc = mumps->nnz;
2031     mumps->id.irn_loc = mumps->irn;
2032     mumps->id.jcn_loc = mumps->jcn;
2033     if (mumps->id.ICNTL(6)>1) {
2034       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2035     }
2036     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2037       PetscCall(MatCreateVecs(A,NULL,&b));
2038       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2039       PetscCall(VecDestroy(&b));
2040     }
2041     break;
2042   }
2043   PetscMUMPS_c(mumps);
2044   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2045 
2046   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2047   F->ops->solve           = MatSolve_MUMPS;
2048   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2049 
2050   mumps->matstruc = SAME_NONZERO_PATTERN;
2051   PetscFunctionReturn(0);
2052 }
2053 
2054 /* Note the Petsc r permutation and factor info are ignored */
2055 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2056 {
2057   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2058   Vec            b;
2059   const PetscInt M = A->rmap->N;
2060 
2061   PetscFunctionBegin;
2062   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2063     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2064     PetscFunctionReturn(0);
2065   }
2066 
2067   /* Set MUMPS options from the options database */
2068   PetscCall(MatSetFromOptions_MUMPS(F,A));
2069 
2070   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2071   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2072 
2073   /* analysis phase */
2074   /*----------------*/
2075   mumps->id.job = JOB_FACTSYMBOLIC;
2076   mumps->id.n   = M;
2077   switch (mumps->id.ICNTL(18)) {
2078   case 0:  /* centralized assembled matrix input */
2079     if (!mumps->myid) {
2080       mumps->id.nnz = mumps->nnz;
2081       mumps->id.irn = mumps->irn;
2082       mumps->id.jcn = mumps->jcn;
2083       if (mumps->id.ICNTL(6)>1) {
2084         mumps->id.a = (MumpsScalar*)mumps->val;
2085       }
2086     }
2087     break;
2088   case 3:  /* distributed assembled matrix input (size>1) */
2089     mumps->id.nnz_loc = mumps->nnz;
2090     mumps->id.irn_loc = mumps->irn;
2091     mumps->id.jcn_loc = mumps->jcn;
2092     if (mumps->id.ICNTL(6)>1) {
2093       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2094     }
2095     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2096       PetscCall(MatCreateVecs(A,NULL,&b));
2097       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2098       PetscCall(VecDestroy(&b));
2099     }
2100     break;
2101   }
2102   PetscMUMPS_c(mumps);
2103   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2104 
2105   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2106   F->ops->solve                 = MatSolve_MUMPS;
2107   F->ops->solvetranspose        = MatSolve_MUMPS;
2108   F->ops->matsolve              = MatMatSolve_MUMPS;
2109   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2110 #if defined(PETSC_USE_COMPLEX)
2111   F->ops->getinertia = NULL;
2112 #else
2113   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2114 #endif
2115 
2116   mumps->matstruc = SAME_NONZERO_PATTERN;
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2121 {
2122   PetscBool         iascii;
2123   PetscViewerFormat format;
2124   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2125 
2126   PetscFunctionBegin;
2127   /* check if matrix is mumps type */
2128   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2129 
2130   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
2131   if (iascii) {
2132     PetscCall(PetscViewerGetFormat(viewer,&format));
2133     if (format == PETSC_VIEWER_ASCII_INFO) {
2134       PetscCall(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n"));
2135       PetscCall(PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d\n",mumps->id.sym));
2136       PetscCall(PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d\n",mumps->id.par));
2137       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d\n",mumps->id.ICNTL(1)));
2138       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2)));
2139       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d\n",mumps->id.ICNTL(3)));
2140       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d\n",mumps->id.ICNTL(4)));
2141       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d\n",mumps->id.ICNTL(5)));
2142       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d\n",mumps->id.ICNTL(6)));
2143       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7)));
2144       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d\n",mumps->id.ICNTL(8)));
2145       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d\n",mumps->id.ICNTL(10)));
2146       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d\n",mumps->id.ICNTL(11)));
2147       if (mumps->id.ICNTL(11)>0) {
2148         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4)));
2149         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5)));
2150         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6)));
2151         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8)));
2152         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9)));
2153         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11)));
2154       }
2155       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d\n",mumps->id.ICNTL(12)));
2156       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d\n",mumps->id.ICNTL(13)));
2157       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14)));
2158       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(15) (compression of the input matrix):            %d\n",mumps->id.ICNTL(15)));
2159       /* ICNTL(15-17) not used */
2160       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d\n",mumps->id.ICNTL(18)));
2161       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d\n",mumps->id.ICNTL(19)));
2162       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d\n",mumps->id.ICNTL(20)));
2163       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d\n",mumps->id.ICNTL(21)));
2164       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d\n",mumps->id.ICNTL(22)));
2165       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23)));
2166 
2167       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d\n",mumps->id.ICNTL(24)));
2168       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d\n",mumps->id.ICNTL(25)));
2169       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d\n",mumps->id.ICNTL(26)));
2170       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d\n",mumps->id.ICNTL(27)));
2171       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d\n",mumps->id.ICNTL(28)));
2172       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d\n",mumps->id.ICNTL(29)));
2173 
2174       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n",mumps->id.ICNTL(30)));
2175       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d\n",mumps->id.ICNTL(31)));
2176       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d\n",mumps->id.ICNTL(33)));
2177       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d\n",mumps->id.ICNTL(35)));
2178       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d\n",mumps->id.ICNTL(36)));
2179       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d\n",mumps->id.ICNTL(38)));
2180 
2181       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1)));
2182       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2)));
2183       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3)));
2184       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4)));
2185       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5)));
2186       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7)));
2187 
2188       /* information local to each processor */
2189       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n"));
2190       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2191       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1)));
2192       PetscCall(PetscViewerFlush(viewer));
2193       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n"));
2194       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2)));
2195       PetscCall(PetscViewerFlush(viewer));
2196       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n"));
2197       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3)));
2198       PetscCall(PetscViewerFlush(viewer));
2199 
2200       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n"));
2201       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d\n",mumps->myid,mumps->id.INFO(15)));
2202       PetscCall(PetscViewerFlush(viewer));
2203 
2204       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n"));
2205       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(16)));
2206       PetscCall(PetscViewerFlush(viewer));
2207 
2208       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n"));
2209       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(23)));
2210       PetscCall(PetscViewerFlush(viewer));
2211 
2212       if (mumps->ninfo && mumps->ninfo <= 80) {
2213         PetscInt i;
2214         for (i=0; i<mumps->ninfo; i++) {
2215           PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "): \n",mumps->info[i]));
2216           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i])));
2217           PetscCall(PetscViewerFlush(viewer));
2218         }
2219       }
2220       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2221 
2222       if (!mumps->myid) { /* information from the host */
2223         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1)));
2224         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2)));
2225         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3)));
2226         PetscCall(PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34)));
2227 
2228         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3)));
2229         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4)));
2230         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5)));
2231         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6)));
2232         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7)));
2233         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8)));
2234         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9)));
2235         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10)));
2236         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11)));
2237         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12)));
2238         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13)));
2239         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14)));
2240         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15)));
2241         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16)));
2242         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17)));
2243         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18)));
2244         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19)));
2245         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20)));
2246         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21)));
2247         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22)));
2248         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23)));
2249         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24)));
2250         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25)));
2251         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28)));
2252         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29)));
2253         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31)));
2254         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32)));
2255         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33)));
2256         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34)));
2257         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35)));
2258         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36)));
2259         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37)));
2260         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38)));
2261         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39)));
2262       }
2263     }
2264   }
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2269 {
2270   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2271 
2272   PetscFunctionBegin;
2273   info->block_size        = 1.0;
2274   info->nz_allocated      = mumps->id.INFOG(20);
2275   info->nz_used           = mumps->id.INFOG(20);
2276   info->nz_unneeded       = 0.0;
2277   info->assemblies        = 0.0;
2278   info->mallocs           = 0.0;
2279   info->memory            = 0.0;
2280   info->fill_ratio_given  = 0;
2281   info->fill_ratio_needed = 0;
2282   info->factor_mallocs    = 0;
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 /* -------------------------------------------------------------------------------------------*/
2287 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2288 {
2289   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2290   const PetscScalar *arr;
2291   const PetscInt    *idxs;
2292   PetscInt          size,i;
2293 
2294   PetscFunctionBegin;
2295   PetscCall(ISGetLocalSize(is,&size));
2296   if (mumps->petsc_size > 1) {
2297     PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2298 
2299     ls   = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2300     PetscCallMPI(MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm));
2301     PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc");
2302   }
2303 
2304   /* Schur complement matrix */
2305   PetscCall(MatDestroy(&F->schur));
2306   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur));
2307   PetscCall(MatDenseGetArrayRead(F->schur,&arr));
2308   mumps->id.schur      = (MumpsScalar*)arr;
2309   mumps->id.size_schur = size;
2310   mumps->id.schur_lld  = size;
2311   PetscCall(MatDenseRestoreArrayRead(F->schur,&arr));
2312   if (mumps->sym == 1) {
2313     PetscCall(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE));
2314   }
2315 
2316   /* MUMPS expects Fortran style indices */
2317   PetscCall(PetscFree(mumps->id.listvar_schur));
2318   PetscCall(PetscMalloc1(size,&mumps->id.listvar_schur));
2319   PetscCall(ISGetIndices(is,&idxs));
2320   for (i=0; i<size; i++) PetscCall(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i])));
2321   PetscCall(ISRestoreIndices(is,&idxs));
2322   if (mumps->petsc_size > 1) {
2323     mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2324   } else {
2325     if (F->factortype == MAT_FACTOR_LU) {
2326       mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2327     } else {
2328       mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2329     }
2330   }
2331   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2332   mumps->id.ICNTL(26) = -1;
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 /* -------------------------------------------------------------------------------------------*/
2337 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2338 {
2339   Mat            St;
2340   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2341   PetscScalar    *array;
2342 #if defined(PETSC_USE_COMPLEX)
2343   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2344 #endif
2345 
2346   PetscFunctionBegin;
2347   PetscCheck(mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2348   PetscCall(MatCreate(PETSC_COMM_SELF,&St));
2349   PetscCall(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur));
2350   PetscCall(MatSetType(St,MATDENSE));
2351   PetscCall(MatSetUp(St));
2352   PetscCall(MatDenseGetArray(St,&array));
2353   if (!mumps->sym) { /* MUMPS always return a full matrix */
2354     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2355       PetscInt i,j,N=mumps->id.size_schur;
2356       for (i=0;i<N;i++) {
2357         for (j=0;j<N;j++) {
2358 #if !defined(PETSC_USE_COMPLEX)
2359           PetscScalar val = mumps->id.schur[i*N+j];
2360 #else
2361           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2362 #endif
2363           array[j*N+i] = val;
2364         }
2365       }
2366     } else { /* stored by columns */
2367       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2368     }
2369   } else { /* either full or lower-triangular (not packed) */
2370     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2371       PetscInt i,j,N=mumps->id.size_schur;
2372       for (i=0;i<N;i++) {
2373         for (j=i;j<N;j++) {
2374 #if !defined(PETSC_USE_COMPLEX)
2375           PetscScalar val = mumps->id.schur[i*N+j];
2376 #else
2377           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2378 #endif
2379           array[i*N+j] = val;
2380           array[j*N+i] = val;
2381         }
2382       }
2383     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2384       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2385     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2386       PetscInt i,j,N=mumps->id.size_schur;
2387       for (i=0;i<N;i++) {
2388         for (j=0;j<i+1;j++) {
2389 #if !defined(PETSC_USE_COMPLEX)
2390           PetscScalar val = mumps->id.schur[i*N+j];
2391 #else
2392           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2393 #endif
2394           array[i*N+j] = val;
2395           array[j*N+i] = val;
2396         }
2397       }
2398     }
2399   }
2400   PetscCall(MatDenseRestoreArray(St,&array));
2401   *S   = St;
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 /* -------------------------------------------------------------------------------------------*/
2406 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2407 {
2408   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2409 
2410   PetscFunctionBegin;
2411   PetscCall(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl)));
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2416 {
2417   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2418 
2419   PetscFunctionBegin;
2420   *ival = mumps->id.ICNTL(icntl);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 /*@
2425   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2426 
2427    Logically Collective on Mat
2428 
2429    Input Parameters:
2430 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2431 .  icntl - index of MUMPS parameter array ICNTL()
2432 -  ival - value of MUMPS ICNTL(icntl)
2433 
2434   Options Database:
2435 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2436 
2437    Level: beginner
2438 
2439    References:
2440 .  * - MUMPS Users' Guide
2441 
2442 .seealso: `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2443 @*/
2444 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2445 {
2446   PetscFunctionBegin;
2447   PetscValidType(F,1);
2448   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2449   PetscValidLogicalCollectiveInt(F,icntl,2);
2450   PetscValidLogicalCollectiveInt(F,ival,3);
2451   PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 /*@
2456   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2457 
2458    Logically Collective on Mat
2459 
2460    Input Parameters:
2461 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2462 -  icntl - index of MUMPS parameter array ICNTL()
2463 
2464   Output Parameter:
2465 .  ival - value of MUMPS ICNTL(icntl)
2466 
2467    Level: beginner
2468 
2469    References:
2470 .  * - MUMPS Users' Guide
2471 
2472 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2473 @*/
2474 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2475 {
2476   PetscFunctionBegin;
2477   PetscValidType(F,1);
2478   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2479   PetscValidLogicalCollectiveInt(F,icntl,2);
2480   PetscValidIntPointer(ival,3);
2481   PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 /* -------------------------------------------------------------------------------------------*/
2486 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2487 {
2488   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2489 
2490   PetscFunctionBegin;
2491   mumps->id.CNTL(icntl) = val;
2492   PetscFunctionReturn(0);
2493 }
2494 
2495 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2496 {
2497   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2498 
2499   PetscFunctionBegin;
2500   *val = mumps->id.CNTL(icntl);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 /*@
2505   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2506 
2507    Logically Collective on Mat
2508 
2509    Input Parameters:
2510 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2511 .  icntl - index of MUMPS parameter array CNTL()
2512 -  val - value of MUMPS CNTL(icntl)
2513 
2514   Options Database:
2515 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2516 
2517    Level: beginner
2518 
2519    References:
2520 .  * - MUMPS Users' Guide
2521 
2522 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2523 @*/
2524 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2525 {
2526   PetscFunctionBegin;
2527   PetscValidType(F,1);
2528   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2529   PetscValidLogicalCollectiveInt(F,icntl,2);
2530   PetscValidLogicalCollectiveReal(F,val,3);
2531   PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));
2532   PetscFunctionReturn(0);
2533 }
2534 
2535 /*@
2536   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2537 
2538    Logically Collective on Mat
2539 
2540    Input Parameters:
2541 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2542 -  icntl - index of MUMPS parameter array CNTL()
2543 
2544   Output Parameter:
2545 .  val - value of MUMPS CNTL(icntl)
2546 
2547    Level: beginner
2548 
2549    References:
2550 .  * - MUMPS Users' Guide
2551 
2552 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2553 @*/
2554 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2555 {
2556   PetscFunctionBegin;
2557   PetscValidType(F,1);
2558   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2559   PetscValidLogicalCollectiveInt(F,icntl,2);
2560   PetscValidRealPointer(val,3);
2561   PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2566 {
2567   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2568 
2569   PetscFunctionBegin;
2570   *info = mumps->id.INFO(icntl);
2571   PetscFunctionReturn(0);
2572 }
2573 
2574 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2575 {
2576   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2577 
2578   PetscFunctionBegin;
2579   *infog = mumps->id.INFOG(icntl);
2580   PetscFunctionReturn(0);
2581 }
2582 
2583 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2584 {
2585   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2586 
2587   PetscFunctionBegin;
2588   *rinfo = mumps->id.RINFO(icntl);
2589   PetscFunctionReturn(0);
2590 }
2591 
2592 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2593 {
2594   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2595 
2596   PetscFunctionBegin;
2597   *rinfog = mumps->id.RINFOG(icntl);
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2602 {
2603   Mat            Bt = NULL,Btseq = NULL;
2604   PetscBool      flg;
2605   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2606   PetscScalar    *aa;
2607   PetscInt       spnr,*ia,*ja,M,nrhs;
2608 
2609   PetscFunctionBegin;
2610   PetscValidPointer(spRHS,2);
2611   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg));
2612   if (flg) {
2613     PetscCall(MatTransposeGetMat(spRHS,&Bt));
2614   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2615 
2616   PetscCall(MatMumpsSetIcntl(F,30,1));
2617 
2618   if (mumps->petsc_size > 1) {
2619     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2620     Btseq = b->A;
2621   } else {
2622     Btseq = Bt;
2623   }
2624 
2625   PetscCall(MatGetSize(spRHS,&M,&nrhs));
2626   mumps->id.nrhs = nrhs;
2627   mumps->id.lrhs = M;
2628   mumps->id.rhs  = NULL;
2629 
2630   if (!mumps->myid) {
2631     PetscCall(MatSeqAIJGetArray(Btseq,&aa));
2632     PetscCall(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2633     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2634     PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
2635     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2636   } else {
2637     mumps->id.irhs_ptr    = NULL;
2638     mumps->id.irhs_sparse = NULL;
2639     mumps->id.nz_rhs      = 0;
2640     mumps->id.rhs_sparse  = NULL;
2641   }
2642   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2643   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2644 
2645   /* solve phase */
2646   /*-------------*/
2647   mumps->id.job = JOB_SOLVE;
2648   PetscMUMPS_c(mumps);
2649   if (mumps->id.INFOG(1) < 0)
2650     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
2651 
2652   if (!mumps->myid) {
2653     PetscCall(MatSeqAIJRestoreArray(Btseq,&aa));
2654     PetscCall(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2655     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2656   }
2657   PetscFunctionReturn(0);
2658 }
2659 
2660 /*@
2661   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2662 
2663    Logically Collective on Mat
2664 
2665    Input Parameters:
2666 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2667 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2668 
2669   Output Parameter:
2670 . spRHS - requested entries of inverse of A
2671 
2672    Level: beginner
2673 
2674    References:
2675 .  * - MUMPS Users' Guide
2676 
2677 .seealso: `MatGetFactor()`, `MatCreateTranspose()`
2678 @*/
2679 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2680 {
2681   PetscFunctionBegin;
2682   PetscValidType(F,1);
2683   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2684   PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2689 {
2690   Mat            spRHS;
2691 
2692   PetscFunctionBegin;
2693   PetscCall(MatCreateTranspose(spRHST,&spRHS));
2694   PetscCall(MatMumpsGetInverse_MUMPS(F,spRHS));
2695   PetscCall(MatDestroy(&spRHS));
2696   PetscFunctionReturn(0);
2697 }
2698 
2699 /*@
2700   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2701 
2702    Logically Collective on Mat
2703 
2704    Input Parameters:
2705 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2706 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2707 
2708   Output Parameter:
2709 . spRHST - requested entries of inverse of A^T
2710 
2711    Level: beginner
2712 
2713    References:
2714 .  * - MUMPS Users' Guide
2715 
2716 .seealso: `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()`
2717 @*/
2718 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2719 {
2720   PetscBool      flg;
2721 
2722   PetscFunctionBegin;
2723   PetscValidType(F,1);
2724   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2725   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
2726   PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2727 
2728   PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));
2729   PetscFunctionReturn(0);
2730 }
2731 
2732 /*@
2733   MatMumpsGetInfo - Get MUMPS parameter INFO()
2734 
2735    Logically Collective on Mat
2736 
2737    Input Parameters:
2738 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2739 -  icntl - index of MUMPS parameter array INFO()
2740 
2741   Output Parameter:
2742 .  ival - value of MUMPS INFO(icntl)
2743 
2744    Level: beginner
2745 
2746    References:
2747 .  * - MUMPS Users' Guide
2748 
2749 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2750 @*/
2751 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2752 {
2753   PetscFunctionBegin;
2754   PetscValidType(F,1);
2755   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2756   PetscValidIntPointer(ival,3);
2757   PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2758   PetscFunctionReturn(0);
2759 }
2760 
2761 /*@
2762   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2763 
2764    Logically Collective on Mat
2765 
2766    Input Parameters:
2767 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2768 -  icntl - index of MUMPS parameter array INFOG()
2769 
2770   Output Parameter:
2771 .  ival - value of MUMPS INFOG(icntl)
2772 
2773    Level: beginner
2774 
2775    References:
2776 .  * - MUMPS Users' Guide
2777 
2778 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`
2779 @*/
2780 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2781 {
2782   PetscFunctionBegin;
2783   PetscValidType(F,1);
2784   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2785   PetscValidIntPointer(ival,3);
2786   PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 /*@
2791   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2792 
2793    Logically Collective on Mat
2794 
2795    Input Parameters:
2796 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2797 -  icntl - index of MUMPS parameter array RINFO()
2798 
2799   Output Parameter:
2800 .  val - value of MUMPS RINFO(icntl)
2801 
2802    Level: beginner
2803 
2804    References:
2805 .  * - MUMPS Users' Guide
2806 
2807 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()`
2808 @*/
2809 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2810 {
2811   PetscFunctionBegin;
2812   PetscValidType(F,1);
2813   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2814   PetscValidRealPointer(val,3);
2815   PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2816   PetscFunctionReturn(0);
2817 }
2818 
2819 /*@
2820   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2821 
2822    Logically Collective on Mat
2823 
2824    Input Parameters:
2825 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2826 -  icntl - index of MUMPS parameter array RINFOG()
2827 
2828   Output Parameter:
2829 .  val - value of MUMPS RINFOG(icntl)
2830 
2831    Level: beginner
2832 
2833    References:
2834 .  * - MUMPS Users' Guide
2835 
2836 .seealso: `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`
2837 @*/
2838 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2839 {
2840   PetscFunctionBegin;
2841   PetscValidType(F,1);
2842   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2843   PetscValidRealPointer(val,3);
2844   PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2845   PetscFunctionReturn(0);
2846 }
2847 
2848 /*MC
2849   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2850   distributed and sequential matrices via the external package MUMPS.
2851 
2852   Works with MATAIJ and MATSBAIJ matrices
2853 
2854   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2855 
2856   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2857 
2858   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2859 
2860   Options Database Keys:
2861 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2862 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2863 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2864 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2865 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2866 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2867                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2868 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2869 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2870 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2871 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2872 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2873 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2874 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
2875 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2876 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2877 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2878 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2879 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2880 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2881 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2882 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2883 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2884 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2885 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2886 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2887 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2888 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2889 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2890 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2891 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2892 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2893 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2894 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2895 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2896 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2897                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2898 
2899   Level: beginner
2900 
2901     Notes:
2902     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2903 
2904     When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling
2905     `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix.
2906 
2907     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2908 $          KSPGetPC(ksp,&pc);
2909 $          PCFactorGetMatrix(pc,&mat);
2910 $          MatMumpsGetInfo(mat,....);
2911 $          MatMumpsGetInfog(mat,....); etc.
2912            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2913 
2914   Using MUMPS with 64-bit integers
2915     MUMPS provides 64-bit integer support in two build modes:
2916       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2917       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2918 
2919       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2920       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2921       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2922       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2923 
2924     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2925 
2926   Two modes to run MUMPS/PETSc with OpenMP
2927 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2928 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2929 
2930 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2931 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2932 
2933    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2934    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2935    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2936    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2937    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2938 
2939    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2940    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2941    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2942    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2943    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2944    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2945    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2946    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2947    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2948    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2949    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2950    examine the mapping result.
2951 
2952    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2953    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2954    calls omp_set_num_threads(m) internally before calling MUMPS.
2955 
2956    References:
2957 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2958 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2959 
2960 .seealso: `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `KSPGetPC()`, `PCFactorGetMatrix()`
2961 
2962 M*/
2963 
2964 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2965 {
2966   PetscFunctionBegin;
2967   *type = MATSOLVERMUMPS;
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 /* MatGetFactor for Seq and MPI AIJ matrices */
2972 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
2973 {
2974   Mat            B;
2975   Mat_MUMPS      *mumps;
2976   PetscBool      isSeqAIJ;
2977   PetscMPIInt    size;
2978 
2979   PetscFunctionBegin;
2980  #if defined(PETSC_USE_COMPLEX)
2981   PetscCheck(!A->hermitian || A->symmetric || ftype != MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
2982  #endif
2983   /* Create the factorization matrix */
2984   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ));
2985   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
2986   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2987   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
2988   PetscCall(MatSetUp(B));
2989 
2990   PetscCall(PetscNewLog(B,&mumps));
2991 
2992   B->ops->view    = MatView_MUMPS;
2993   B->ops->getinfo = MatGetInfo_MUMPS;
2994 
2995   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
2996   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
2997   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
2998   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
2999   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3000   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3001   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3002   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3003   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3004   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3005   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3006   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3007   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3008 
3009   if (ftype == MAT_FACTOR_LU) {
3010     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3011     B->factortype            = MAT_FACTOR_LU;
3012     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3013     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3014     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3015     mumps->sym = 0;
3016   } else {
3017     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3018     B->factortype                  = MAT_FACTOR_CHOLESKY;
3019     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3020     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3021     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3022 #if defined(PETSC_USE_COMPLEX)
3023     mumps->sym = 2;
3024 #else
3025     if (A->spd_set && A->spd) mumps->sym = 1;
3026     else                      mumps->sym = 2;
3027 #endif
3028   }
3029 
3030   /* set solvertype */
3031   PetscCall(PetscFree(B->solvertype));
3032   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3033   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3034   if (size == 1) {
3035     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3036     B->canuseordering = PETSC_TRUE;
3037   }
3038   B->ops->destroy = MatDestroy_MUMPS;
3039   B->data         = (void*)mumps;
3040 
3041   PetscCall(MatSetFromOptions_MUMPS_OpenMP(B,A));
3042 
3043   *F = B;
3044   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3049 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3050 {
3051   Mat            B;
3052   Mat_MUMPS      *mumps;
3053   PetscBool      isSeqSBAIJ;
3054   PetscMPIInt    size;
3055 
3056   PetscFunctionBegin;
3057  #if defined(PETSC_USE_COMPLEX)
3058   PetscCheck(!A->hermitian || A->symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3059  #endif
3060   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3061   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3062   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3063   PetscCall(MatSetUp(B));
3064 
3065   PetscCall(PetscNewLog(B,&mumps));
3066   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ));
3067   if (isSeqSBAIJ) {
3068     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3069   } else {
3070     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3071   }
3072 
3073   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3074   B->ops->view                   = MatView_MUMPS;
3075   B->ops->getinfo                = MatGetInfo_MUMPS;
3076 
3077   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3078   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3079   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3080   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3081   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3082   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3083   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3084   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3085   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3086   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3087   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3088   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3089   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3090 
3091   B->factortype = MAT_FACTOR_CHOLESKY;
3092 #if defined(PETSC_USE_COMPLEX)
3093   mumps->sym = 2;
3094 #else
3095   if (A->spd_set && A->spd) mumps->sym = 1;
3096   else                      mumps->sym = 2;
3097 #endif
3098 
3099   /* set solvertype */
3100   PetscCall(PetscFree(B->solvertype));
3101   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3102   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3103   if (size == 1) {
3104     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3105     B->canuseordering = PETSC_TRUE;
3106   }
3107   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3108   B->ops->destroy = MatDestroy_MUMPS;
3109   B->data         = (void*)mumps;
3110 
3111   PetscCall(MatSetFromOptions_MUMPS_OpenMP(B,A));
3112 
3113   *F = B;
3114   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3115   PetscFunctionReturn(0);
3116 }
3117 
3118 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3119 {
3120   Mat            B;
3121   Mat_MUMPS      *mumps;
3122   PetscBool      isSeqBAIJ;
3123   PetscMPIInt    size;
3124 
3125   PetscFunctionBegin;
3126   /* Create the factorization matrix */
3127   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ));
3128   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3129   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3130   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3131   PetscCall(MatSetUp(B));
3132 
3133   PetscCall(PetscNewLog(B,&mumps));
3134   if (ftype == MAT_FACTOR_LU) {
3135     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3136     B->factortype            = MAT_FACTOR_LU;
3137     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3138     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3139     mumps->sym = 0;
3140     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3141   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3142 
3143   B->ops->view        = MatView_MUMPS;
3144   B->ops->getinfo     = MatGetInfo_MUMPS;
3145 
3146   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3147   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3148   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3149   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3150   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3151   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3152   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3153   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3154   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3155   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3156   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3157   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3158   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3159 
3160   /* set solvertype */
3161   PetscCall(PetscFree(B->solvertype));
3162   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3163   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3164   if (size == 1) {
3165     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3166     B->canuseordering = PETSC_TRUE;
3167   }
3168   B->ops->destroy = MatDestroy_MUMPS;
3169   B->data         = (void*)mumps;
3170 
3171   PetscCall(MatSetFromOptions_MUMPS_OpenMP(B,A));
3172 
3173   *F = B;
3174   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3175   PetscFunctionReturn(0);
3176 }
3177 
3178 /* MatGetFactor for Seq and MPI SELL matrices */
3179 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3180 {
3181   Mat            B;
3182   Mat_MUMPS      *mumps;
3183   PetscBool      isSeqSELL;
3184   PetscMPIInt    size;
3185 
3186   PetscFunctionBegin;
3187   /* Create the factorization matrix */
3188   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL));
3189   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3190   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3191   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3192   PetscCall(MatSetUp(B));
3193 
3194   PetscCall(PetscNewLog(B,&mumps));
3195 
3196   B->ops->view        = MatView_MUMPS;
3197   B->ops->getinfo     = MatGetInfo_MUMPS;
3198 
3199   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3200   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3201   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3202   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3203   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3204   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3205   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3206   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3207   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3208   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3209   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3210 
3211   if (ftype == MAT_FACTOR_LU) {
3212     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3213     B->factortype            = MAT_FACTOR_LU;
3214     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3215     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3216     mumps->sym = 0;
3217     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3218   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3219 
3220   /* set solvertype */
3221   PetscCall(PetscFree(B->solvertype));
3222   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3223   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3224   if (size == 1) {
3225     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3226     B->canuseordering = PETSC_TRUE;
3227   }
3228   B->ops->destroy = MatDestroy_MUMPS;
3229   B->data         = (void*)mumps;
3230 
3231   PetscCall(MatSetFromOptions_MUMPS_OpenMP(B,A));
3232 
3233   *F = B;
3234   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3235   PetscFunctionReturn(0);
3236 }
3237 
3238 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3239 {
3240   PetscFunctionBegin;
3241   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3242   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3243   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3244   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3245   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3246   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3247   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3248   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3249   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3250   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3251   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps));
3252   PetscFunctionReturn(0);
3253 }
3254