xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision ef46b1a67e276116c83b5d4ce8efc2932ea4fc0a)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30 
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45 
46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48    naming convention in PetscMPIInt, PetscBLASInt etc.
49 */
50 typedef MUMPS_INT PetscMUMPSInt;
51 
52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55   #endif
56 #else
57   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #endif
61 
62 #define MPIU_MUMPSINT             MPI_INT
63 #define PETSC_MUMPS_INT_MAX       2147483647
64 #define PETSC_MUMPS_INT_MIN       -2147483648
65 
66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68 {
69   PetscFunctionBegin;
70 #if PetscDefined(USE_64BIT_INDICES)
71   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
72 #endif
73   *b = (PetscMUMPSInt)(a);
74   PetscFunctionReturn(0);
75 }
76 
77 /* Put these utility routines here since they are only used in this file */
78 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
79 {
80   PetscInt       myval;
81   PetscBool      myset;
82   PetscFunctionBegin;
83   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
84   PetscCall(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub));
85   if (myset) PetscCall(PetscMUMPSIntCast(myval,value));
86   if (set) *set = myset;
87   PetscFunctionReturn(0);
88 }
89 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
90 
91 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
92 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
93 #define PetscMUMPS_c(mumps) \
94   do { \
95     if (mumps->use_petsc_omp_support) { \
96       if (mumps->is_omp_master) { \
97         PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
98         MUMPS_c(&mumps->id); \
99         PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
100       } \
101       PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
102       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
103          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
104          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
105          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
106       */ \
107       PetscCallMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\
108       PetscCallMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm));\
109       PetscCallMPI(MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm));\
110     } else { \
111       MUMPS_c(&mumps->id); \
112     } \
113   } while (0)
114 #else
115 #define PetscMUMPS_c(mumps) \
116   do { MUMPS_c(&mumps->id); } while (0)
117 #endif
118 
119 /* declare MumpsScalar */
120 #if defined(PETSC_USE_COMPLEX)
121 #if defined(PETSC_USE_REAL_SINGLE)
122 #define MumpsScalar mumps_complex
123 #else
124 #define MumpsScalar mumps_double_complex
125 #endif
126 #else
127 #define MumpsScalar PetscScalar
128 #endif
129 
130 /* macros s.t. indices match MUMPS documentation */
131 #define ICNTL(I) icntl[(I)-1]
132 #define CNTL(I) cntl[(I)-1]
133 #define INFOG(I) infog[(I)-1]
134 #define INFO(I) info[(I)-1]
135 #define RINFOG(I) rinfog[(I)-1]
136 #define RINFO(I) rinfo[(I)-1]
137 
138 typedef struct Mat_MUMPS Mat_MUMPS;
139 struct Mat_MUMPS {
140 #if defined(PETSC_USE_COMPLEX)
141 #if defined(PETSC_USE_REAL_SINGLE)
142   CMUMPS_STRUC_C id;
143 #else
144   ZMUMPS_STRUC_C id;
145 #endif
146 #else
147 #if defined(PETSC_USE_REAL_SINGLE)
148   SMUMPS_STRUC_C id;
149 #else
150   DMUMPS_STRUC_C id;
151 #endif
152 #endif
153 
154   MatStructure   matstruc;
155   PetscMPIInt    myid,petsc_size;
156   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
157   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
158   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
159   PetscMUMPSInt  sym;
160   MPI_Comm       mumps_comm;
161   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
162   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
163   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
164   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
165 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
166   PetscInt       *rhs_nrow,max_nrhs;
167   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
168   PetscScalar    *rhs_loc,*rhs_recvbuf;
169 #endif
170   Vec            b_seq,x_seq;
171   PetscInt       ninfo,*info;           /* which INFO to display */
172   PetscInt       sizeredrhs;
173   PetscScalar    *schur_sol;
174   PetscInt       schur_sizesol;
175   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
176   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
177   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
178 
179   /* stuff used by petsc/mumps OpenMP support*/
180   PetscBool      use_petsc_omp_support;
181   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
182   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
183   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
184   PetscMPIInt    tag,omp_comm_size;
185   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
186   MPI_Request    *reqs;
187 };
188 
189 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
190    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
191  */
192 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
193 {
194   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_USE_64BIT_INDICES)
198   {
199     PetscInt i;
200     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201       PetscCall(PetscFree(mumps->ia_alloc));
202       PetscCall(PetscMalloc1(nrow+1,&mumps->ia_alloc));
203       mumps->cur_ilen = nrow+1;
204     }
205     if (nnz > mumps->cur_jlen) {
206       PetscCall(PetscFree(mumps->ja_alloc));
207       PetscCall(PetscMalloc1(nnz,&mumps->ja_alloc));
208       mumps->cur_jlen = nnz;
209     }
210     for (i=0; i<nrow+1; i++) PetscCall(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i])));
211     for (i=0; i<nnz; i++)    PetscCall(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i])));
212     *ia_mumps = mumps->ia_alloc;
213     *ja_mumps = mumps->ja_alloc;
214   }
215 #else
216   *ia_mumps = ia;
217   *ja_mumps = ja;
218 #endif
219   PetscCall(PetscMUMPSIntCast(nnz,nnz_mumps));
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
224 {
225   PetscFunctionBegin;
226   PetscCall(PetscFree(mumps->id.listvar_schur));
227   PetscCall(PetscFree(mumps->id.redrhs));
228   PetscCall(PetscFree(mumps->schur_sol));
229   mumps->id.size_schur = 0;
230   mumps->id.schur_lld  = 0;
231   mumps->id.ICNTL(19)  = 0;
232   PetscFunctionReturn(0);
233 }
234 
235 /* solve with rhs in mumps->id.redrhs and return in the same location */
236 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
237 {
238   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
239   Mat                  S,B,X;
240   MatFactorSchurStatus schurstatus;
241   PetscInt             sizesol;
242 
243   PetscFunctionBegin;
244   PetscCall(MatFactorFactorizeSchurComplement(F));
245   PetscCall(MatFactorGetSchurComplement(F,&S,&schurstatus));
246   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B));
247   PetscCall(MatSetType(B,((PetscObject)S)->type_name));
248 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
249   PetscCall(MatBindToCPU(B,S->boundtocpu));
250 #endif
251   switch (schurstatus) {
252   case MAT_FACTOR_SCHUR_FACTORED:
253     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X));
254     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
255 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
256     PetscCall(MatBindToCPU(X,S->boundtocpu));
257 #endif
258     if (!mumps->id.ICNTL(9)) { /* transpose solve */
259       PetscCall(MatMatSolveTranspose(S,B,X));
260     } else {
261       PetscCall(MatMatSolve(S,B,X));
262     }
263     break;
264   case MAT_FACTOR_SCHUR_INVERTED:
265     sizesol = mumps->id.nrhs*mumps->id.size_schur;
266     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
267       PetscCall(PetscFree(mumps->schur_sol));
268       PetscCall(PetscMalloc1(sizesol,&mumps->schur_sol));
269       mumps->schur_sizesol = sizesol;
270     }
271     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X));
272     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
274     PetscCall(MatBindToCPU(X,S->boundtocpu));
275 #endif
276     PetscCall(MatProductCreateWithMat(S,B,NULL,X));
277     if (!mumps->id.ICNTL(9)) { /* transpose solve */
278       PetscCall(MatProductSetType(X,MATPRODUCT_AtB));
279     } else {
280       PetscCall(MatProductSetType(X,MATPRODUCT_AB));
281     }
282     PetscCall(MatProductSetFromOptions(X));
283     PetscCall(MatProductSymbolic(X));
284     PetscCall(MatProductNumeric(X));
285 
286     PetscCall(MatCopy(X,B,SAME_NONZERO_PATTERN));
287     break;
288   default:
289     SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status);
290   }
291   PetscCall(MatFactorRestoreSchurComplement(F,&S,schurstatus));
292   PetscCall(MatDestroy(&B));
293   PetscCall(MatDestroy(&X));
294   PetscFunctionReturn(0);
295 }
296 
297 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
298 {
299   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
300 
301   PetscFunctionBegin;
302   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
303     PetscFunctionReturn(0);
304   }
305   if (!expansion) { /* prepare for the condensation step */
306     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
307     /* allocate MUMPS internal array to store reduced right-hand sides */
308     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
309       PetscCall(PetscFree(mumps->id.redrhs));
310       mumps->id.lredrhs = mumps->id.size_schur;
311       PetscCall(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs));
312       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
313     }
314     mumps->id.ICNTL(26) = 1; /* condensation phase */
315   } else { /* prepare for the expansion step */
316     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
317     PetscCall(MatMumpsSolveSchur_Private(F));
318     mumps->id.ICNTL(26) = 2; /* expansion phase */
319     PetscMUMPS_c(mumps);
320     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
321     /* restore defaults */
322     mumps->id.ICNTL(26) = -1;
323     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
324     if (mumps->id.nrhs > 1) {
325       PetscCall(PetscFree(mumps->id.redrhs));
326       mumps->id.lredrhs = 0;
327       mumps->sizeredrhs = 0;
328     }
329   }
330   PetscFunctionReturn(0);
331 }
332 
333 /*
334   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
335 
336   input:
337     A       - matrix in aij,baij or sbaij format
338     shift   - 0: C style output triple; 1: Fortran style output triple.
339     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
340               MAT_REUSE_MATRIX:   only the values in v array are updated
341   output:
342     nnz     - dim of r, c, and v (number of local nonzero entries of A)
343     r, c, v - row and col index, matrix values (matrix triples)
344 
345   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
346   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
347   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
348 
349  */
350 
351 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
352 {
353   const PetscScalar *av;
354   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
355   PetscInt64        nz,rnz,i,j,k;
356   PetscMUMPSInt     *row,*col;
357   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
358 
359   PetscFunctionBegin;
360   PetscCall(MatSeqAIJGetArrayRead(A,&av));
361   mumps->val = (PetscScalar*)av;
362   if (reuse == MAT_INITIAL_MATRIX) {
363     nz   = aa->nz;
364     ai   = aa->i;
365     aj   = aa->j;
366     PetscCall(PetscMalloc2(nz,&row,nz,&col));
367     for (i=k=0; i<M; i++) {
368       rnz = ai[i+1] - ai[i];
369       ajj = aj + ai[i];
370       for (j=0; j<rnz; j++) {
371         PetscCall(PetscMUMPSIntCast(i+shift,&row[k]));
372         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[k]));
373         k++;
374       }
375     }
376     mumps->irn = row;
377     mumps->jcn = col;
378     mumps->nnz = nz;
379   }
380   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
381   PetscFunctionReturn(0);
382 }
383 
384 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
385 {
386   PetscInt64     nz,i,j,k,r;
387   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
388   PetscMUMPSInt  *row,*col;
389 
390   PetscFunctionBegin;
391   mumps->val = a->val;
392   if (reuse == MAT_INITIAL_MATRIX) {
393     nz   = a->sliidx[a->totalslices];
394     PetscCall(PetscMalloc2(nz,&row,nz,&col));
395     for (i=k=0; i<a->totalslices; i++) {
396       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
397         PetscCall(PetscMUMPSIntCast(8*i+r+shift,&row[k++]));
398       }
399     }
400     for (i=0;i<nz;i++) PetscCall(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]));
401     mumps->irn = row;
402     mumps->jcn = col;
403     mumps->nnz = nz;
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
409 {
410   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
411   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
412   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
413   PetscInt       bs;
414   PetscMUMPSInt  *row,*col;
415 
416   PetscFunctionBegin;
417   PetscCall(MatGetBlockSize(A,&bs));
418   M          = A->rmap->N/bs;
419   mumps->val = aa->a;
420   if (reuse == MAT_INITIAL_MATRIX) {
421     ai   = aa->i; aj = aa->j;
422     nz   = bs2*aa->nz;
423     PetscCall(PetscMalloc2(nz,&row,nz,&col));
424     for (i=0; i<M; i++) {
425       ajj = aj + ai[i];
426       rnz = ai[i+1] - ai[i];
427       for (k=0; k<rnz; k++) {
428         for (j=0; j<bs; j++) {
429           for (m=0; m<bs; m++) {
430             PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[idx]));
431             PetscCall(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]));
432             idx++;
433           }
434         }
435       }
436     }
437     mumps->irn = row;
438     mumps->jcn = col;
439     mumps->nnz = nz;
440   }
441   PetscFunctionReturn(0);
442 }
443 
444 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
445 {
446   const PetscInt *ai, *aj,*ajj;
447   PetscInt        bs;
448   PetscInt64      nz,rnz,i,j,k,m;
449   PetscMUMPSInt   *row,*col;
450   PetscScalar     *val;
451   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
452   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
453 #if defined(PETSC_USE_COMPLEX)
454   PetscBool       hermitian;
455 #endif
456 
457   PetscFunctionBegin;
458 #if defined(PETSC_USE_COMPLEX)
459   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
460   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
461 #endif
462   ai   = aa->i;
463   aj   = aa->j;
464   PetscCall(MatGetBlockSize(A,&bs));
465   if (reuse == MAT_INITIAL_MATRIX) {
466     nz   = aa->nz;
467     PetscCall(PetscMalloc2(bs2*nz,&row,bs2*nz,&col));
468     if (bs>1) {
469       PetscCall(PetscMalloc1(bs2*nz,&mumps->val_alloc));
470       mumps->val = mumps->val_alloc;
471     } else {
472       mumps->val = aa->a;
473     }
474     mumps->irn = row;
475     mumps->jcn = col;
476   } else {
477     if (bs == 1) mumps->val = aa->a;
478     row = mumps->irn;
479     col = mumps->jcn;
480   }
481   val = mumps->val;
482 
483   nz = 0;
484   if (bs>1) {
485     for (i=0; i<mbs; i++) {
486       rnz = ai[i+1] - ai[i];
487       ajj = aj + ai[i];
488       for (j=0; j<rnz; j++) {
489         for (k=0; k<bs; k++) {
490           for (m=0; m<bs; m++) {
491             if (ajj[j]>i || k>=m) {
492               if (reuse == MAT_INITIAL_MATRIX) {
493                 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[nz]));
494                 PetscCall(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]));
495               }
496               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
497             }
498           }
499         }
500       }
501     }
502   } else if (reuse == MAT_INITIAL_MATRIX) {
503     for (i=0; i<mbs; i++) {
504       rnz = ai[i+1] - ai[i];
505       ajj = aj + ai[i];
506       for (j=0; j<rnz; j++) {
507         PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
508         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
509         nz++;
510       }
511     }
512     PetscCheck(nz == aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz);
513   }
514   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
515   PetscFunctionReturn(0);
516 }
517 
518 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
519 {
520   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
521   PetscInt64        nz,rnz,i,j;
522   const PetscScalar *av,*v1;
523   PetscScalar       *val;
524   PetscMUMPSInt     *row,*col;
525   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
526   PetscBool         missing;
527 #if defined(PETSC_USE_COMPLEX)
528   PetscBool         hermitian;
529 #endif
530 
531   PetscFunctionBegin;
532 #if defined(PETSC_USE_COMPLEX)
533   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
534   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
535 #endif
536   PetscCall(MatSeqAIJGetArrayRead(A,&av));
537   ai    = aa->i; aj = aa->j;
538   adiag = aa->diag;
539   PetscCall(MatMissingDiagonal_SeqAIJ(A,&missing,NULL));
540   if (reuse == MAT_INITIAL_MATRIX) {
541     /* count nz in the upper triangular part of A */
542     nz = 0;
543     if (missing) {
544       for (i=0; i<M; i++) {
545         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
546           for (j=ai[i];j<ai[i+1];j++) {
547             if (aj[j] < i) continue;
548             nz++;
549           }
550         } else {
551           nz += ai[i+1] - adiag[i];
552         }
553       }
554     } else {
555       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
556     }
557     PetscCall(PetscMalloc2(nz,&row,nz,&col));
558     PetscCall(PetscMalloc1(nz,&val));
559     mumps->nnz = nz;
560     mumps->irn = row;
561     mumps->jcn = col;
562     mumps->val = mumps->val_alloc = val;
563 
564     nz = 0;
565     if (missing) {
566       for (i=0; i<M; i++) {
567         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
568           for (j=ai[i];j<ai[i+1];j++) {
569             if (aj[j] < i) continue;
570             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
571             PetscCall(PetscMUMPSIntCast(aj[j]+shift,&col[nz]));
572             val[nz] = av[j];
573             nz++;
574           }
575         } else {
576           rnz = ai[i+1] - adiag[i];
577           ajj = aj + adiag[i];
578           v1  = av + adiag[i];
579           for (j=0; j<rnz; j++) {
580             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
581             PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
582             val[nz++] = v1[j];
583           }
584         }
585       }
586     } else {
587       for (i=0; i<M; i++) {
588         rnz = ai[i+1] - adiag[i];
589         ajj = aj + adiag[i];
590         v1  = av + adiag[i];
591         for (j=0; j<rnz; j++) {
592           PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
593           PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
594           val[nz++] = v1[j];
595         }
596       }
597     }
598   } else {
599     nz = 0;
600     val = mumps->val;
601     if (missing) {
602       for (i=0; i <M; i++) {
603         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
604           for (j=ai[i];j<ai[i+1];j++) {
605             if (aj[j] < i) continue;
606             val[nz++] = av[j];
607           }
608         } else {
609           rnz = ai[i+1] - adiag[i];
610           v1  = av + adiag[i];
611           for (j=0; j<rnz; j++) {
612             val[nz++] = v1[j];
613           }
614         }
615       }
616     } else {
617       for (i=0; i <M; i++) {
618         rnz = ai[i+1] - adiag[i];
619         v1  = av + adiag[i];
620         for (j=0; j<rnz; j++) {
621           val[nz++] = v1[j];
622         }
623       }
624     }
625   }
626   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
627   PetscFunctionReturn(0);
628 }
629 
630 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
631 {
632   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
633   PetscInt          bs;
634   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
635   PetscMUMPSInt     *row,*col;
636   const PetscScalar *av,*bv,*v1,*v2;
637   PetscScalar       *val;
638   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
639   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
640   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
641   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
642 #if defined(PETSC_USE_COMPLEX)
643   PetscBool         hermitian;
644 #endif
645 
646   PetscFunctionBegin;
647 #if defined(PETSC_USE_COMPLEX)
648   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
649   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
650 #endif
651   PetscCall(MatGetBlockSize(A,&bs));
652   rstart = A->rmap->rstart;
653   ai = aa->i;
654   aj = aa->j;
655   bi = bb->i;
656   bj = bb->j;
657   av = aa->a;
658   bv = bb->a;
659 
660   garray = mat->garray;
661 
662   if (reuse == MAT_INITIAL_MATRIX) {
663     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
664     PetscCall(PetscMalloc2(nz,&row,nz,&col));
665     PetscCall(PetscMalloc1(nz,&val));
666     /* can not decide the exact mumps->nnz now because of the SBAIJ */
667     mumps->irn = row;
668     mumps->jcn = col;
669     mumps->val = mumps->val_alloc = val;
670   } else {
671     val = mumps->val;
672   }
673 
674   jj = 0; irow = rstart;
675   for (i=0; i<mbs; i++) {
676     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
677     countA = ai[i+1] - ai[i];
678     countB = bi[i+1] - bi[i];
679     bjj    = bj + bi[i];
680     v1     = av + ai[i]*bs2;
681     v2     = bv + bi[i]*bs2;
682 
683     if (bs>1) {
684       /* A-part */
685       for (j=0; j<countA; j++) {
686         for (k=0; k<bs; k++) {
687           for (m=0; m<bs; m++) {
688             if (rstart + ajj[j]*bs>irow || k>=m) {
689               if (reuse == MAT_INITIAL_MATRIX) {
690                 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
691                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]));
692               }
693               val[jj++] = v1[j*bs2 + m + k*bs];
694             }
695           }
696         }
697       }
698 
699       /* B-part */
700       for (j=0; j < countB; j++) {
701         for (k=0; k<bs; k++) {
702           for (m=0; m<bs; m++) {
703             if (reuse == MAT_INITIAL_MATRIX) {
704               PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
705               PetscCall(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]));
706             }
707             val[jj++] = v2[j*bs2 + m + k*bs];
708           }
709         }
710       }
711     } else {
712       /* A-part */
713       for (j=0; j<countA; j++) {
714         if (reuse == MAT_INITIAL_MATRIX) {
715           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
716           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
717         }
718         val[jj++] = v1[j];
719       }
720 
721       /* B-part */
722       for (j=0; j < countB; j++) {
723         if (reuse == MAT_INITIAL_MATRIX) {
724           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
725           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
726         }
727         val[jj++] = v2[j];
728       }
729     }
730     irow+=bs;
731   }
732   mumps->nnz = jj;
733   PetscFunctionReturn(0);
734 }
735 
736 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
737 {
738   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
739   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
740   PetscMUMPSInt     *row,*col;
741   const PetscScalar *av, *bv,*v1,*v2;
742   PetscScalar       *val;
743   Mat               Ad,Ao;
744   Mat_SeqAIJ        *aa;
745   Mat_SeqAIJ        *bb;
746 
747   PetscFunctionBegin;
748   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
749   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
750   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
751 
752   aa = (Mat_SeqAIJ*)(Ad)->data;
753   bb = (Mat_SeqAIJ*)(Ao)->data;
754   ai = aa->i;
755   aj = aa->j;
756   bi = bb->i;
757   bj = bb->j;
758 
759   rstart = A->rmap->rstart;
760 
761   if (reuse == MAT_INITIAL_MATRIX) {
762     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
763     PetscCall(PetscMalloc2(nz,&row,nz,&col));
764     PetscCall(PetscMalloc1(nz,&val));
765     mumps->nnz = nz;
766     mumps->irn = row;
767     mumps->jcn = col;
768     mumps->val = mumps->val_alloc = val;
769   } else {
770     val = mumps->val;
771   }
772 
773   jj = 0; irow = rstart;
774   for (i=0; i<m; i++) {
775     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
776     countA = ai[i+1] - ai[i];
777     countB = bi[i+1] - bi[i];
778     bjj    = bj + bi[i];
779     v1     = av + ai[i];
780     v2     = bv + bi[i];
781 
782     /* A-part */
783     for (j=0; j<countA; j++) {
784       if (reuse == MAT_INITIAL_MATRIX) {
785         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
786         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
787       }
788       val[jj++] = v1[j];
789     }
790 
791     /* B-part */
792     for (j=0; j < countB; j++) {
793       if (reuse == MAT_INITIAL_MATRIX) {
794         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
795         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
796       }
797       val[jj++] = v2[j];
798     }
799     irow++;
800   }
801   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
802   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
803   PetscFunctionReturn(0);
804 }
805 
806 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
807 {
808   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
809   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
810   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
811   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
812   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
813   const PetscInt    bs2=mat->bs2;
814   PetscInt          bs;
815   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
816   PetscMUMPSInt     *row,*col;
817   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
818   PetscScalar       *val;
819 
820   PetscFunctionBegin;
821   PetscCall(MatGetBlockSize(A,&bs));
822   if (reuse == MAT_INITIAL_MATRIX) {
823     nz   = bs2*(aa->nz + bb->nz);
824     PetscCall(PetscMalloc2(nz,&row,nz,&col));
825     PetscCall(PetscMalloc1(nz,&val));
826     mumps->nnz = nz;
827     mumps->irn = row;
828     mumps->jcn = col;
829     mumps->val = mumps->val_alloc = val;
830   } else {
831     val = mumps->val;
832   }
833 
834   jj = 0; irow = rstart;
835   for (i=0; i<mbs; i++) {
836     countA = ai[i+1] - ai[i];
837     countB = bi[i+1] - bi[i];
838     ajj    = aj + ai[i];
839     bjj    = bj + bi[i];
840     v1     = av + bs2*ai[i];
841     v2     = bv + bs2*bi[i];
842 
843     idx = 0;
844     /* A-part */
845     for (k=0; k<countA; k++) {
846       for (j=0; j<bs; j++) {
847         for (n=0; n<bs; n++) {
848           if (reuse == MAT_INITIAL_MATRIX) {
849             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
850             PetscCall(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]));
851           }
852           val[jj++] = v1[idx++];
853         }
854       }
855     }
856 
857     idx = 0;
858     /* B-part */
859     for (k=0; k<countB; k++) {
860       for (j=0; j<bs; j++) {
861         for (n=0; n<bs; n++) {
862           if (reuse == MAT_INITIAL_MATRIX) {
863             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
864             PetscCall(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]));
865           }
866           val[jj++] = v2[idx++];
867         }
868       }
869     }
870     irow += bs;
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
876 {
877   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
878   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
879   PetscMUMPSInt     *row,*col;
880   const PetscScalar *av, *bv,*v1,*v2;
881   PetscScalar       *val;
882   Mat               Ad,Ao;
883   Mat_SeqAIJ        *aa;
884   Mat_SeqAIJ        *bb;
885 #if defined(PETSC_USE_COMPLEX)
886   PetscBool         hermitian;
887 #endif
888 
889   PetscFunctionBegin;
890 #if defined(PETSC_USE_COMPLEX)
891   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
892   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
893 #endif
894   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
895   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
896   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
897 
898   aa    = (Mat_SeqAIJ*)(Ad)->data;
899   bb    = (Mat_SeqAIJ*)(Ao)->data;
900   ai    = aa->i;
901   aj    = aa->j;
902   adiag = aa->diag;
903   bi    = bb->i;
904   bj    = bb->j;
905 
906   rstart = A->rmap->rstart;
907 
908   if (reuse == MAT_INITIAL_MATRIX) {
909     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
910     nzb = 0;    /* num of upper triangular entries in mat->B */
911     for (i=0; i<m; i++) {
912       nza   += (ai[i+1] - adiag[i]);
913       countB = bi[i+1] - bi[i];
914       bjj    = bj + bi[i];
915       for (j=0; j<countB; j++) {
916         if (garray[bjj[j]] > rstart) nzb++;
917       }
918     }
919 
920     nz   = nza + nzb; /* total nz of upper triangular part of mat */
921     PetscCall(PetscMalloc2(nz,&row,nz,&col));
922     PetscCall(PetscMalloc1(nz,&val));
923     mumps->nnz = nz;
924     mumps->irn = row;
925     mumps->jcn = col;
926     mumps->val = mumps->val_alloc = val;
927   } else {
928     val = mumps->val;
929   }
930 
931   jj = 0; irow = rstart;
932   for (i=0; i<m; i++) {
933     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
934     v1     = av + adiag[i];
935     countA = ai[i+1] - adiag[i];
936     countB = bi[i+1] - bi[i];
937     bjj    = bj + bi[i];
938     v2     = bv + bi[i];
939 
940     /* A-part */
941     for (j=0; j<countA; j++) {
942       if (reuse == MAT_INITIAL_MATRIX) {
943         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
944         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
945       }
946       val[jj++] = v1[j];
947     }
948 
949     /* B-part */
950     for (j=0; j < countB; j++) {
951       if (garray[bjj[j]] > rstart) {
952         if (reuse == MAT_INITIAL_MATRIX) {
953           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
954           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
955         }
956         val[jj++] = v2[j];
957       }
958     }
959     irow++;
960   }
961   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
962   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatDestroy_MUMPS(Mat A)
967 {
968   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
969 
970   PetscFunctionBegin;
971   PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
972   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
973   PetscCall(VecScatterDestroy(&mumps->scat_sol));
974   PetscCall(VecDestroy(&mumps->b_seq));
975   PetscCall(VecDestroy(&mumps->x_seq));
976   PetscCall(PetscFree(mumps->id.perm_in));
977   PetscCall(PetscFree2(mumps->irn,mumps->jcn));
978   PetscCall(PetscFree(mumps->val_alloc));
979   PetscCall(PetscFree(mumps->info));
980   PetscCall(MatMumpsResetSchur_Private(mumps));
981   mumps->id.job = JOB_END;
982   PetscMUMPS_c(mumps);
983   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
984 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
985   if (mumps->use_petsc_omp_support) {
986     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
987     PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
988     PetscCall(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps));
989   }
990 #endif
991   PetscCall(PetscFree(mumps->ia_alloc));
992   PetscCall(PetscFree(mumps->ja_alloc));
993   PetscCall(PetscFree(mumps->recvcount));
994   PetscCall(PetscFree(mumps->reqs));
995   PetscCall(PetscFree(mumps->irhs_loc));
996   if (mumps->mumps_comm != MPI_COMM_NULL) PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm));
997   PetscCall(PetscFree(A->data));
998 
999   /* clear composed functions */
1000   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1001   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL));
1002   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL));
1003   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL));
1004   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL));
1005   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL));
1006   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL));
1007   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL));
1008   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL));
1009   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL));
1010   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL));
1011   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL));
1012   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL));
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1017 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1018 {
1019   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1020   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1021   PetscInt           i,m,M,rstart;
1022 
1023   PetscFunctionBegin;
1024   PetscCall(MatGetSize(A,&M,NULL));
1025   PetscCall(MatGetLocalSize(A,&m,NULL));
1026   PetscCheck(M <= PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1027   if (ompsize == 1) {
1028     if (!mumps->irhs_loc) {
1029       mumps->nloc_rhs = m;
1030       PetscCall(PetscMalloc1(m,&mumps->irhs_loc));
1031       PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1032       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1033     }
1034     mumps->id.rhs_loc = (MumpsScalar*)array;
1035   } else {
1036   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1037     const PetscInt  *ranges;
1038     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1039     MPI_Group       petsc_group,omp_group;
1040     PetscScalar     *recvbuf=NULL;
1041 
1042     if (mumps->is_omp_master) {
1043       /* Lazily initialize the omp stuff for distributed rhs */
1044       if (!mumps->irhs_loc) {
1045         PetscCall(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks));
1046         PetscCall(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps));
1047         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group));
1048         PetscCallMPI(MPI_Comm_group(mumps->omp_comm,&omp_group));
1049         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1050         PetscCallMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks));
1051 
1052         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1053         mumps->nloc_rhs = 0;
1054         PetscCall(MatGetOwnershipRanges(A,&ranges));
1055         for (j=0; j<ompsize; j++) {
1056           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1057           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1058         }
1059         PetscCall(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc));
1060         for (j=k=0; j<ompsize; j++) {
1061           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1062         }
1063 
1064         PetscCall(PetscFree2(omp_ranks,petsc_ranks));
1065         PetscCallMPI(MPI_Group_free(&petsc_group));
1066         PetscCallMPI(MPI_Group_free(&omp_group));
1067       }
1068 
1069       /* Realloc buffers when current nrhs is bigger than what we have met */
1070       if (nrhs > mumps->max_nrhs) {
1071         PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
1072         PetscCall(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf));
1073         mumps->max_nrhs = nrhs;
1074       }
1075 
1076       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1077       for (j=0; j<ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]));
1078       mumps->rhs_disps[0] = 0;
1079       for (j=1; j<ompsize; j++) {
1080         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1081         PetscCheck(mumps->rhs_disps[j] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1082       }
1083       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1084     }
1085 
1086     PetscCall(PetscMPIIntCast(m*nrhs,&sendcount));
1087     PetscCallMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm));
1088 
1089     if (mumps->is_omp_master) {
1090       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1091         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1092         for (j=0; j<ompsize; j++) {
1093           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1094           dst = dstbase;
1095           for (i=0; i<nrhs; i++) {
1096             PetscCall(PetscArraycpy(dst,src,mumps->rhs_nrow[j]));
1097             src += mumps->rhs_nrow[j];
1098             dst += mumps->nloc_rhs;
1099           }
1100           dstbase += mumps->rhs_nrow[j];
1101         }
1102       }
1103       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1104     }
1105   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1106   }
1107   mumps->id.nrhs     = nrhs;
1108   mumps->id.nloc_rhs = mumps->nloc_rhs;
1109   mumps->id.lrhs_loc = mumps->nloc_rhs;
1110   mumps->id.irhs_loc = mumps->irhs_loc;
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1115 {
1116   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1117   const PetscScalar  *rarray = NULL;
1118   PetscScalar        *array;
1119   IS                 is_iden,is_petsc;
1120   PetscInt           i;
1121   PetscBool          second_solve = PETSC_FALSE;
1122   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1123 
1124   PetscFunctionBegin;
1125   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1));
1126   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2));
1127 
1128   if (A->factorerrortype) {
1129     PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1130     PetscCall(VecSetInf(x));
1131     PetscFunctionReturn(0);
1132   }
1133 
1134   mumps->id.nrhs = 1;
1135   if (mumps->petsc_size > 1) {
1136     if (mumps->ICNTL20 == 10) {
1137       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1138       PetscCall(VecGetArrayRead(b,&rarray));
1139       PetscCall(MatMumpsSetUpDistRHSInfo(A,1,rarray));
1140     } else {
1141       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1142       PetscCall(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1143       PetscCall(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1144       if (!mumps->myid) {
1145         PetscCall(VecGetArray(mumps->b_seq,&array));
1146         mumps->id.rhs = (MumpsScalar*)array;
1147       }
1148     }
1149   } else {  /* petsc_size == 1 */
1150     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1151     PetscCall(VecCopy(b,x));
1152     PetscCall(VecGetArray(x,&array));
1153     mumps->id.rhs = (MumpsScalar*)array;
1154   }
1155 
1156   /*
1157      handle condensation step of Schur complement (if any)
1158      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1159      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1160      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1161      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1162   */
1163   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1164     PetscCheck(mumps->petsc_size <= 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1165     second_solve = PETSC_TRUE;
1166     PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1167   }
1168   /* solve phase */
1169   /*-------------*/
1170   mumps->id.job = JOB_SOLVE;
1171   PetscMUMPS_c(mumps);
1172   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1173 
1174   /* handle expansion step of Schur complement (if any) */
1175   if (second_solve) {
1176     PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1177   }
1178 
1179   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1180     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1181       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1182       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1183     }
1184     if (!mumps->scat_sol) { /* create scatter scat_sol */
1185       PetscInt *isol2_loc=NULL;
1186       PetscCall(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */
1187       PetscCall(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc));
1188       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1189       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc));  /* to */
1190       PetscCall(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol));
1191       PetscCall(ISDestroy(&is_iden));
1192       PetscCall(ISDestroy(&is_petsc));
1193       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1194     }
1195 
1196     PetscCall(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1197     PetscCall(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1198   }
1199 
1200   if (mumps->petsc_size > 1) {
1201     if (mumps->ICNTL20 == 10) {
1202       PetscCall(VecRestoreArrayRead(b,&rarray));
1203     } else if (!mumps->myid) {
1204       PetscCall(VecRestoreArray(mumps->b_seq,&array));
1205     }
1206   } else PetscCall(VecRestoreArray(x,&array));
1207 
1208   PetscCall(PetscLogFlops(2.0*mumps->id.RINFO(3)));
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1213 {
1214   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1215 
1216   PetscFunctionBegin;
1217   mumps->id.ICNTL(9) = 0;
1218   PetscCall(MatSolve_MUMPS(A,b,x));
1219   mumps->id.ICNTL(9) = 1;
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1224 {
1225   Mat               Bt = NULL;
1226   PetscBool         denseX,denseB,flg,flgT;
1227   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1228   PetscInt          i,nrhs,M;
1229   PetscScalar       *array;
1230   const PetscScalar *rbray;
1231   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1232   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1233   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1234   IS                is_to,is_from;
1235   PetscInt          k,proc,j,m,myrstart;
1236   const PetscInt    *rstart;
1237   Vec               v_mpi,msol_loc;
1238   VecScatter        scat_sol;
1239   Vec               b_seq;
1240   VecScatter        scat_rhs;
1241   PetscScalar       *aa;
1242   PetscInt          spnr,*ia,*ja;
1243   Mat_MPIAIJ        *b = NULL;
1244 
1245   PetscFunctionBegin;
1246   PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL));
1247   PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1248 
1249   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL));
1250   if (denseB) {
1251     PetscCheck(B->rmap->n == X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1252     mumps->id.ICNTL(20)= 0; /* dense RHS */
1253   } else { /* sparse B */
1254     PetscCheck(X != B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1255     PetscCall(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT));
1256     if (flgT) { /* input B is transpose of actural RHS matrix,
1257                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1258       PetscCall(MatTransposeGetMat(B,&Bt));
1259     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1260     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1261   }
1262 
1263   PetscCall(MatGetSize(B,&M,&nrhs));
1264   mumps->id.nrhs = nrhs;
1265   mumps->id.lrhs = M;
1266   mumps->id.rhs  = NULL;
1267 
1268   if (mumps->petsc_size == 1) {
1269     PetscScalar *aa;
1270     PetscInt    spnr,*ia,*ja;
1271     PetscBool   second_solve = PETSC_FALSE;
1272 
1273     PetscCall(MatDenseGetArray(X,&array));
1274     mumps->id.rhs = (MumpsScalar*)array;
1275 
1276     if (denseB) {
1277       /* copy B to X */
1278       PetscCall(MatDenseGetArrayRead(B,&rbray));
1279       PetscCall(PetscArraycpy(array,rbray,M*nrhs));
1280       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1281     } else { /* sparse B */
1282       PetscCall(MatSeqAIJGetArray(Bt,&aa));
1283       PetscCall(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1284       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1285       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1286       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1287     }
1288     /* handle condensation step of Schur complement (if any) */
1289     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1290       second_solve = PETSC_TRUE;
1291       PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1292     }
1293     /* solve phase */
1294     /*-------------*/
1295     mumps->id.job = JOB_SOLVE;
1296     PetscMUMPS_c(mumps);
1297     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1298 
1299     /* handle expansion step of Schur complement (if any) */
1300     if (second_solve) {
1301       PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1302     }
1303     if (!denseB) { /* sparse B */
1304       PetscCall(MatSeqAIJRestoreArray(Bt,&aa));
1305       PetscCall(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1306       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1307     }
1308     PetscCall(MatDenseRestoreArray(X,&array));
1309     PetscFunctionReturn(0);
1310   }
1311 
1312   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1313   PetscCheckFalse(mumps->petsc_size > 1 && mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1314 
1315   /* create msol_loc to hold mumps local solution */
1316   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1317   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1318 
1319   lsol_loc  = mumps->id.lsol_loc;
1320   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1321   PetscCall(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc));
1322   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1323   mumps->id.isol_loc = isol_loc;
1324 
1325   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc));
1326 
1327   if (denseB) {
1328     if (mumps->ICNTL20 == 10) {
1329       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1330       PetscCall(MatDenseGetArrayRead(B,&rbray));
1331       PetscCall(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray));
1332       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1333       PetscCall(MatGetLocalSize(B,&m,NULL));
1334       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi));
1335     } else {
1336       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1337       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1338         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1339         0, re-arrange B into desired order, which is a local operation.
1340       */
1341 
1342       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1343       /* wrap dense rhs matrix B into a vector v_mpi */
1344       PetscCall(MatGetLocalSize(B,&m,NULL));
1345       PetscCall(MatDenseGetArray(B,&bray));
1346       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1347       PetscCall(MatDenseRestoreArray(B,&bray));
1348 
1349       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1350       if (!mumps->myid) {
1351         PetscInt *idx;
1352         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1353         PetscCall(PetscMalloc1(nrhs*M,&idx));
1354         PetscCall(MatGetOwnershipRanges(B,&rstart));
1355         k = 0;
1356         for (proc=0; proc<mumps->petsc_size; proc++) {
1357           for (j=0; j<nrhs; j++) {
1358             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1359           }
1360         }
1361 
1362         PetscCall(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq));
1363         PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to));
1364         PetscCall(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from));
1365       } else {
1366         PetscCall(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq));
1367         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to));
1368         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from));
1369       }
1370       PetscCall(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs));
1371       PetscCall(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1372       PetscCall(ISDestroy(&is_to));
1373       PetscCall(ISDestroy(&is_from));
1374       PetscCall(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1375 
1376       if (!mumps->myid) { /* define rhs on the host */
1377         PetscCall(VecGetArray(b_seq,&bray));
1378         mumps->id.rhs = (MumpsScalar*)bray;
1379         PetscCall(VecRestoreArray(b_seq,&bray));
1380       }
1381     }
1382   } else { /* sparse B */
1383     b = (Mat_MPIAIJ*)Bt->data;
1384 
1385     /* wrap dense X into a vector v_mpi */
1386     PetscCall(MatGetLocalSize(X,&m,NULL));
1387     PetscCall(MatDenseGetArray(X,&bray));
1388     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1389     PetscCall(MatDenseRestoreArray(X,&bray));
1390 
1391     if (!mumps->myid) {
1392       PetscCall(MatSeqAIJGetArray(b->A,&aa));
1393       PetscCall(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1394       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1395       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1396       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1397     } else {
1398       mumps->id.irhs_ptr    = NULL;
1399       mumps->id.irhs_sparse = NULL;
1400       mumps->id.nz_rhs      = 0;
1401       mumps->id.rhs_sparse  = NULL;
1402     }
1403   }
1404 
1405   /* solve phase */
1406   /*-------------*/
1407   mumps->id.job = JOB_SOLVE;
1408   PetscMUMPS_c(mumps);
1409   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1410 
1411   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1412   PetscCall(MatDenseGetArray(X,&array));
1413   PetscCall(VecPlaceArray(v_mpi,array));
1414 
1415   /* create scatter scat_sol */
1416   PetscCall(MatGetOwnershipRanges(X,&rstart));
1417   /* iidx: index for scatter mumps solution to petsc X */
1418 
1419   PetscCall(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from));
1420   PetscCall(PetscMalloc1(nlsol_loc,&idxx));
1421   for (i=0; i<lsol_loc; i++) {
1422     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1423 
1424     for (proc=0; proc<mumps->petsc_size; proc++) {
1425       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1426         myrstart = rstart[proc];
1427         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1428         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1429         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1430         break;
1431       }
1432     }
1433 
1434     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1435   }
1436   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to));
1437   PetscCall(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol));
1438   PetscCall(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1439   PetscCall(ISDestroy(&is_from));
1440   PetscCall(ISDestroy(&is_to));
1441   PetscCall(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1442   PetscCall(MatDenseRestoreArray(X,&array));
1443 
1444   /* free spaces */
1445   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1446   mumps->id.isol_loc = isol_loc_save;
1447 
1448   PetscCall(PetscFree2(sol_loc,isol_loc));
1449   PetscCall(PetscFree(idxx));
1450   PetscCall(VecDestroy(&msol_loc));
1451   PetscCall(VecDestroy(&v_mpi));
1452   if (!denseB) {
1453     if (!mumps->myid) {
1454       b = (Mat_MPIAIJ*)Bt->data;
1455       PetscCall(MatSeqAIJRestoreArray(b->A,&aa));
1456       PetscCall(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1457       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1458     }
1459   } else {
1460     if (mumps->ICNTL20 == 0) {
1461       PetscCall(VecDestroy(&b_seq));
1462       PetscCall(VecScatterDestroy(&scat_rhs));
1463     }
1464   }
1465   PetscCall(VecScatterDestroy(&scat_sol));
1466   PetscCall(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3)));
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1471 {
1472   PetscBool      flg;
1473   Mat            B;
1474 
1475   PetscFunctionBegin;
1476   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
1477   PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1478 
1479   /* Create B=Bt^T that uses Bt's data structure */
1480   PetscCall(MatCreateTranspose(Bt,&B));
1481 
1482   PetscCall(MatMatSolve_MUMPS(A,B,X));
1483   PetscCall(MatDestroy(&B));
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 #if !defined(PETSC_USE_COMPLEX)
1488 /*
1489   input:
1490    F:        numeric factor
1491   output:
1492    nneg:     total number of negative pivots
1493    nzero:    total number of zero pivots
1494    npos:     (global dimension of F) - nneg - nzero
1495 */
1496 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1497 {
1498   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1499   PetscMPIInt    size;
1500 
1501   PetscFunctionBegin;
1502   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size));
1503   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1504   PetscCheckFalse(size > 1 && mumps->id.ICNTL(13) != 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13));
1505 
1506   if (nneg) *nneg = mumps->id.INFOG(12);
1507   if (nzero || npos) {
1508     PetscCheck(mumps->id.ICNTL(24) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1509     if (nzero) *nzero = mumps->id.INFOG(28);
1510     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1511   }
1512   PetscFunctionReturn(0);
1513 }
1514 #endif
1515 
1516 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1517 {
1518   PetscInt       i,nreqs;
1519   PetscMUMPSInt  *irn,*jcn;
1520   PetscMPIInt    count;
1521   PetscInt64     totnnz,remain;
1522   const PetscInt osize=mumps->omp_comm_size;
1523   PetscScalar    *val;
1524 
1525   PetscFunctionBegin;
1526   if (osize > 1) {
1527     if (reuse == MAT_INITIAL_MATRIX) {
1528       /* master first gathers counts of nonzeros to receive */
1529       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize,&mumps->recvcount));
1530       PetscCallMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm));
1531 
1532       /* Then each computes number of send/recvs */
1533       if (mumps->is_omp_master) {
1534         /* Start from 1 since self communication is not done in MPI */
1535         nreqs = 0;
1536         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1537       } else {
1538         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1539       }
1540       PetscCall(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */
1541 
1542       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1543          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1544          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1545          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1546        */
1547       nreqs = 0; /* counter for actual send/recvs */
1548       if (mumps->is_omp_master) {
1549         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1550         PetscCall(PetscMalloc2(totnnz,&irn,totnnz,&jcn));
1551         PetscCall(PetscMalloc1(totnnz,&val));
1552 
1553         /* Self communication */
1554         PetscCall(PetscArraycpy(irn,mumps->irn,mumps->nnz));
1555         PetscCall(PetscArraycpy(jcn,mumps->jcn,mumps->nnz));
1556         PetscCall(PetscArraycpy(val,mumps->val,mumps->nnz));
1557 
1558         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1559         PetscCall(PetscFree2(mumps->irn,mumps->jcn));
1560         PetscCall(PetscFree(mumps->val_alloc));
1561         mumps->nnz = totnnz;
1562         mumps->irn = irn;
1563         mumps->jcn = jcn;
1564         mumps->val = mumps->val_alloc = val;
1565 
1566         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1567         jcn += mumps->recvcount[0];
1568         val += mumps->recvcount[0];
1569 
1570         /* Remote communication */
1571         for (i=1; i<osize; i++) {
1572           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1573           remain = mumps->recvcount[i] - count;
1574           while (count>0) {
1575             PetscCallMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1576             PetscCallMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1577             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1578             irn    += count;
1579             jcn    += count;
1580             val    += count;
1581             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1582             remain -= count;
1583           }
1584         }
1585       } else {
1586         irn    = mumps->irn;
1587         jcn    = mumps->jcn;
1588         val    = mumps->val;
1589         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1590         remain = mumps->nnz - count;
1591         while (count>0) {
1592           PetscCallMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1593           PetscCallMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1594           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1595           irn    += count;
1596           jcn    += count;
1597           val    += count;
1598           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1599           remain -= count;
1600         }
1601       }
1602     } else {
1603       nreqs = 0;
1604       if (mumps->is_omp_master) {
1605         val = mumps->val + mumps->recvcount[0];
1606         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1607           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1608           remain = mumps->recvcount[i] - count;
1609           while (count>0) {
1610             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1611             val    += count;
1612             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1613             remain -= count;
1614           }
1615         }
1616       } else {
1617         val    = mumps->val;
1618         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1619         remain = mumps->nnz - count;
1620         while (count>0) {
1621           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1622           val    += count;
1623           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1624           remain -= count;
1625         }
1626       }
1627     }
1628     PetscCallMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE));
1629     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1635 {
1636   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1637   PetscBool      isMPIAIJ;
1638 
1639   PetscFunctionBegin;
1640   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1641     if (mumps->id.INFOG(1) == -6) {
1642       PetscCall(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1643     }
1644     PetscCall(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1645     PetscFunctionReturn(0);
1646   }
1647 
1648   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1649   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps));
1650 
1651   /* numerical factorization phase */
1652   /*-------------------------------*/
1653   mumps->id.job = JOB_FACTNUMERIC;
1654   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1655     if (!mumps->myid) {
1656       mumps->id.a = (MumpsScalar*)mumps->val;
1657     }
1658   } else {
1659     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1660   }
1661   PetscMUMPS_c(mumps);
1662   if (mumps->id.INFOG(1) < 0) {
1663     if (A->erroriffailure) {
1664       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
1665     } else {
1666       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1667         PetscCall(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1668         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1669       } else if (mumps->id.INFOG(1) == -13) {
1670         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1671         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1672       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1673         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1674         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1675       } else {
1676         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1677         F->factorerrortype = MAT_FACTOR_OTHER;
1678       }
1679     }
1680   }
1681   PetscCheckFalse(!mumps->myid && mumps->id.ICNTL(16) > 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16));
1682 
1683   F->assembled    = PETSC_TRUE;
1684 
1685   if (F->schur) { /* reset Schur status to unfactored */
1686 #if defined(PETSC_HAVE_CUDA)
1687     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1688 #endif
1689     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1690       mumps->id.ICNTL(19) = 2;
1691       PetscCall(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur));
1692     }
1693     PetscCall(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED));
1694   }
1695 
1696   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1697   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1698 
1699   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1700   if (mumps->petsc_size > 1) {
1701     PetscInt    lsol_loc;
1702     PetscScalar *sol_loc;
1703 
1704     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ));
1705 
1706     /* distributed solution; Create x_seq=sol_loc for repeated use */
1707     if (mumps->x_seq) {
1708       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1709       PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
1710       PetscCall(VecDestroy(&mumps->x_seq));
1711     }
1712     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1713     PetscCall(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc));
1714     mumps->id.lsol_loc = lsol_loc;
1715     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1716     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq));
1717   }
1718   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 /* Sets MUMPS options from the options database */
1723 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1724 {
1725   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1726   PetscMUMPSInt  icntl=0;
1727   PetscInt       info[80],i,ninfo=80;
1728   PetscBool      flg=PETSC_FALSE;
1729 
1730   PetscFunctionBegin;
1731   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");
1732   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg));
1733   if (flg) mumps->id.ICNTL(1) = icntl;
1734   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg));
1735   if (flg) mumps->id.ICNTL(2) = icntl;
1736   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg));
1737   if (flg) mumps->id.ICNTL(3) = icntl;
1738 
1739   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg));
1740   if (flg) mumps->id.ICNTL(4) = icntl;
1741   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1742 
1743   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg));
1744   if (flg) mumps->id.ICNTL(6) = icntl;
1745 
1746   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg));
1747   if (flg) {
1748     PetscCheckFalse(icntl == 1 || icntl < 0 || icntl > 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1749     mumps->id.ICNTL(7) = icntl;
1750   }
1751 
1752   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL));
1753   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1754   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL));
1755   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL));
1756   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL));
1757   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL));
1758   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL));
1759   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL));
1760   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1761     PetscCall(MatDestroy(&F->schur));
1762     PetscCall(MatMumpsResetSchur_Private(mumps));
1763   }
1764 
1765   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1766      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1767      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1768      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1769      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1770      In short, we could not use distributed RHS with MPICH until v4.0b1.
1771    */
1772 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1773   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1774 #else
1775   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1776 #endif
1777   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg));
1778   PetscCheckFalse(flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20);
1779 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1780   PetscCheckFalse(flg && mumps->ICNTL20 == 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1781 #endif
1782   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1783 
1784   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL));
1785   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL));
1786   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL));
1787   if (mumps->id.ICNTL(24)) {
1788     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1789   }
1790 
1791   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL));
1792   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL));
1793   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL));
1794   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL));
1795   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL));
1796   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1797   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL));
1798   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1799   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL));
1800   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL));
1801   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL));
1802   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL));
1803 
1804   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL));
1805   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL));
1806   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL));
1807   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL));
1808   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL));
1809   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL));
1810 
1811   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1812 
1813   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL));
1814   if (ninfo) {
1815     PetscCheck(ninfo <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo);
1816     PetscCall(PetscMalloc1(ninfo,&mumps->info));
1817     mumps->ninfo = ninfo;
1818     for (i=0; i<ninfo; i++) {
1819       PetscCheckFalse(info[i] < 0 || info[i]>80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo);
1820       else  mumps->info[i] = info[i];
1821     }
1822   }
1823 
1824   PetscOptionsEnd();
1825   PetscFunctionReturn(0);
1826 }
1827 
1828 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1829 {
1830   PetscInt       nthreads=0;
1831 
1832   PetscFunctionBegin;
1833   mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1834   PetscCallMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size));
1835   PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */
1836 
1837   PetscCall(PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support));
1838   if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1839   PetscCall(PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL));
1840   if (mumps->use_petsc_omp_support) {
1841 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1842     PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl));
1843     PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master));
1844 #else
1845     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:"");
1846 #endif
1847   } else {
1848     mumps->omp_comm      = PETSC_COMM_SELF;
1849     mumps->mumps_comm    = mumps->petsc_comm;
1850     mumps->is_omp_master = PETSC_TRUE;
1851   }
1852   PetscCallMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size));
1853   mumps->reqs = NULL;
1854   mumps->tag  = 0;
1855 
1856   /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1857   if (mumps->mumps_comm != MPI_COMM_NULL) {
1858     PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm));
1859   }
1860 
1861   mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1862   mumps->id.job = JOB_INIT;
1863   mumps->id.par = 1;  /* host participates factorizaton and solve */
1864   mumps->id.sym = mumps->sym;
1865 
1866   PetscMUMPS_c(mumps);
1867   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
1868 
1869   /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1870      For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1871    */
1872   PetscCallMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm));
1873   PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm));
1874 
1875   mumps->scat_rhs = NULL;
1876   mumps->scat_sol = NULL;
1877 
1878   /* set PETSc-MUMPS default options - override MUMPS default */
1879   mumps->id.ICNTL(3) = 0;
1880   mumps->id.ICNTL(4) = 0;
1881   if (mumps->petsc_size == 1) {
1882     mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1883     mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1884   } else {
1885     mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1886     mumps->id.ICNTL(21) = 1;   /* distributed solution */
1887   }
1888 
1889   /* schur */
1890   mumps->id.size_schur    = 0;
1891   mumps->id.listvar_schur = NULL;
1892   mumps->id.schur         = NULL;
1893   mumps->sizeredrhs       = 0;
1894   mumps->schur_sol        = NULL;
1895   mumps->schur_sizesol    = 0;
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1900 {
1901   PetscFunctionBegin;
1902   if (mumps->id.INFOG(1) < 0) {
1903     if (A->erroriffailure) {
1904       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1));
1905     } else {
1906       if (mumps->id.INFOG(1) == -6) {
1907         PetscCall(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1908         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1909       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1910         PetscCall(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1911         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1912       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1913         PetscCall(PetscInfo(F,"Empty matrix\n"));
1914       } else {
1915         PetscCall(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1916         F->factorerrortype = MAT_FACTOR_OTHER;
1917       }
1918     }
1919   }
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1924 {
1925   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1926   Vec            b;
1927   const PetscInt M = A->rmap->N;
1928 
1929   PetscFunctionBegin;
1930   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1931     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1932     PetscFunctionReturn(0);
1933   }
1934 
1935   /* Set MUMPS options from the options database */
1936   PetscCall(PetscSetMUMPSFromOptions(F,A));
1937 
1938   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1939   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
1940 
1941   /* analysis phase */
1942   /*----------------*/
1943   mumps->id.job = JOB_FACTSYMBOLIC;
1944   mumps->id.n   = M;
1945   switch (mumps->id.ICNTL(18)) {
1946   case 0:  /* centralized assembled matrix input */
1947     if (!mumps->myid) {
1948       mumps->id.nnz = mumps->nnz;
1949       mumps->id.irn = mumps->irn;
1950       mumps->id.jcn = mumps->jcn;
1951       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1952       if (r) {
1953         mumps->id.ICNTL(7) = 1;
1954         if (!mumps->myid) {
1955           const PetscInt *idx;
1956           PetscInt       i;
1957 
1958           PetscCall(PetscMalloc1(M,&mumps->id.perm_in));
1959           PetscCall(ISGetIndices(r,&idx));
1960           for (i=0; i<M; i++) PetscCall(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
1961           PetscCall(ISRestoreIndices(r,&idx));
1962         }
1963       }
1964     }
1965     break;
1966   case 3:  /* distributed assembled matrix input (size>1) */
1967     mumps->id.nnz_loc = mumps->nnz;
1968     mumps->id.irn_loc = mumps->irn;
1969     mumps->id.jcn_loc = mumps->jcn;
1970     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1971     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1972       PetscCall(MatCreateVecs(A,NULL,&b));
1973       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
1974       PetscCall(VecDestroy(&b));
1975     }
1976     break;
1977   }
1978   PetscMUMPS_c(mumps);
1979   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
1980 
1981   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
1982   F->ops->solve           = MatSolve_MUMPS;
1983   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
1984   F->ops->matsolve        = MatMatSolve_MUMPS;
1985   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
1986 
1987   mumps->matstruc = SAME_NONZERO_PATTERN;
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 /* Note the Petsc r and c permutations are ignored */
1992 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1993 {
1994   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1995   Vec            b;
1996   const PetscInt M = A->rmap->N;
1997 
1998   PetscFunctionBegin;
1999   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2000     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2001     PetscFunctionReturn(0);
2002   }
2003 
2004   /* Set MUMPS options from the options database */
2005   PetscCall(PetscSetMUMPSFromOptions(F,A));
2006 
2007   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2008   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2009 
2010   /* analysis phase */
2011   /*----------------*/
2012   mumps->id.job = JOB_FACTSYMBOLIC;
2013   mumps->id.n   = M;
2014   switch (mumps->id.ICNTL(18)) {
2015   case 0:  /* centralized assembled matrix input */
2016     if (!mumps->myid) {
2017       mumps->id.nnz = mumps->nnz;
2018       mumps->id.irn = mumps->irn;
2019       mumps->id.jcn = mumps->jcn;
2020       if (mumps->id.ICNTL(6)>1) {
2021         mumps->id.a = (MumpsScalar*)mumps->val;
2022       }
2023     }
2024     break;
2025   case 3:  /* distributed assembled matrix input (size>1) */
2026     mumps->id.nnz_loc = mumps->nnz;
2027     mumps->id.irn_loc = mumps->irn;
2028     mumps->id.jcn_loc = mumps->jcn;
2029     if (mumps->id.ICNTL(6)>1) {
2030       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2031     }
2032     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2033       PetscCall(MatCreateVecs(A,NULL,&b));
2034       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2035       PetscCall(VecDestroy(&b));
2036     }
2037     break;
2038   }
2039   PetscMUMPS_c(mumps);
2040   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2041 
2042   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2043   F->ops->solve           = MatSolve_MUMPS;
2044   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2045 
2046   mumps->matstruc = SAME_NONZERO_PATTERN;
2047   PetscFunctionReturn(0);
2048 }
2049 
2050 /* Note the Petsc r permutation and factor info are ignored */
2051 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2052 {
2053   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2054   Vec            b;
2055   const PetscInt M = A->rmap->N;
2056 
2057   PetscFunctionBegin;
2058   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2059     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2060     PetscFunctionReturn(0);
2061   }
2062 
2063   /* Set MUMPS options from the options database */
2064   PetscCall(PetscSetMUMPSFromOptions(F,A));
2065 
2066   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2067   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2068 
2069   /* analysis phase */
2070   /*----------------*/
2071   mumps->id.job = JOB_FACTSYMBOLIC;
2072   mumps->id.n   = M;
2073   switch (mumps->id.ICNTL(18)) {
2074   case 0:  /* centralized assembled matrix input */
2075     if (!mumps->myid) {
2076       mumps->id.nnz = mumps->nnz;
2077       mumps->id.irn = mumps->irn;
2078       mumps->id.jcn = mumps->jcn;
2079       if (mumps->id.ICNTL(6)>1) {
2080         mumps->id.a = (MumpsScalar*)mumps->val;
2081       }
2082     }
2083     break;
2084   case 3:  /* distributed assembled matrix input (size>1) */
2085     mumps->id.nnz_loc = mumps->nnz;
2086     mumps->id.irn_loc = mumps->irn;
2087     mumps->id.jcn_loc = mumps->jcn;
2088     if (mumps->id.ICNTL(6)>1) {
2089       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2090     }
2091     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2092       PetscCall(MatCreateVecs(A,NULL,&b));
2093       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2094       PetscCall(VecDestroy(&b));
2095     }
2096     break;
2097   }
2098   PetscMUMPS_c(mumps);
2099   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2100 
2101   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2102   F->ops->solve                 = MatSolve_MUMPS;
2103   F->ops->solvetranspose        = MatSolve_MUMPS;
2104   F->ops->matsolve              = MatMatSolve_MUMPS;
2105   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2106 #if defined(PETSC_USE_COMPLEX)
2107   F->ops->getinertia = NULL;
2108 #else
2109   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2110 #endif
2111 
2112   mumps->matstruc = SAME_NONZERO_PATTERN;
2113   PetscFunctionReturn(0);
2114 }
2115 
2116 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2117 {
2118   PetscBool         iascii;
2119   PetscViewerFormat format;
2120   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2121 
2122   PetscFunctionBegin;
2123   /* check if matrix is mumps type */
2124   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2125 
2126   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
2127   if (iascii) {
2128     PetscCall(PetscViewerGetFormat(viewer,&format));
2129     if (format == PETSC_VIEWER_ASCII_INFO) {
2130       PetscCall(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n"));
2131       PetscCall(PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d\n",mumps->id.sym));
2132       PetscCall(PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d\n",mumps->id.par));
2133       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d\n",mumps->id.ICNTL(1)));
2134       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2)));
2135       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d\n",mumps->id.ICNTL(3)));
2136       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d\n",mumps->id.ICNTL(4)));
2137       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d\n",mumps->id.ICNTL(5)));
2138       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d\n",mumps->id.ICNTL(6)));
2139       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7)));
2140       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d\n",mumps->id.ICNTL(8)));
2141       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d\n",mumps->id.ICNTL(10)));
2142       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d\n",mumps->id.ICNTL(11)));
2143       if (mumps->id.ICNTL(11)>0) {
2144         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4)));
2145         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5)));
2146         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6)));
2147         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8)));
2148         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9)));
2149         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11)));
2150       }
2151       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d\n",mumps->id.ICNTL(12)));
2152       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d\n",mumps->id.ICNTL(13)));
2153       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14)));
2154       /* ICNTL(15-17) not used */
2155       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d\n",mumps->id.ICNTL(18)));
2156       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d\n",mumps->id.ICNTL(19)));
2157       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d\n",mumps->id.ICNTL(20)));
2158       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d\n",mumps->id.ICNTL(21)));
2159       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d\n",mumps->id.ICNTL(22)));
2160       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23)));
2161 
2162       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d\n",mumps->id.ICNTL(24)));
2163       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d\n",mumps->id.ICNTL(25)));
2164       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d\n",mumps->id.ICNTL(26)));
2165       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d\n",mumps->id.ICNTL(27)));
2166       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d\n",mumps->id.ICNTL(28)));
2167       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d\n",mumps->id.ICNTL(29)));
2168 
2169       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n",mumps->id.ICNTL(30)));
2170       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d\n",mumps->id.ICNTL(31)));
2171       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d\n",mumps->id.ICNTL(33)));
2172       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d\n",mumps->id.ICNTL(35)));
2173       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d\n",mumps->id.ICNTL(36)));
2174       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d\n",mumps->id.ICNTL(38)));
2175 
2176       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1)));
2177       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2)));
2178       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3)));
2179       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4)));
2180       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5)));
2181       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7)));
2182 
2183       /* information local to each processor */
2184       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n"));
2185       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2186       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1)));
2187       PetscCall(PetscViewerFlush(viewer));
2188       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n"));
2189       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2)));
2190       PetscCall(PetscViewerFlush(viewer));
2191       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n"));
2192       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3)));
2193       PetscCall(PetscViewerFlush(viewer));
2194 
2195       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n"));
2196       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d\n",mumps->myid,mumps->id.INFO(15)));
2197       PetscCall(PetscViewerFlush(viewer));
2198 
2199       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n"));
2200       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(16)));
2201       PetscCall(PetscViewerFlush(viewer));
2202 
2203       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n"));
2204       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(23)));
2205       PetscCall(PetscViewerFlush(viewer));
2206 
2207       if (mumps->ninfo && mumps->ninfo <= 80) {
2208         PetscInt i;
2209         for (i=0; i<mumps->ninfo; i++) {
2210           PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "): \n",mumps->info[i]));
2211           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i])));
2212           PetscCall(PetscViewerFlush(viewer));
2213         }
2214       }
2215       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2216 
2217       if (!mumps->myid) { /* information from the host */
2218         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1)));
2219         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2)));
2220         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3)));
2221         PetscCall(PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34)));
2222 
2223         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3)));
2224         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4)));
2225         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5)));
2226         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6)));
2227         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7)));
2228         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8)));
2229         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9)));
2230         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10)));
2231         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11)));
2232         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12)));
2233         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13)));
2234         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14)));
2235         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15)));
2236         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16)));
2237         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17)));
2238         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18)));
2239         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19)));
2240         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20)));
2241         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21)));
2242         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22)));
2243         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23)));
2244         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24)));
2245         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25)));
2246         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28)));
2247         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29)));
2248         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31)));
2249         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32)));
2250         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33)));
2251         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34)));
2252         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35)));
2253         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36)));
2254         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37)));
2255         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38)));
2256         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39)));
2257       }
2258     }
2259   }
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2264 {
2265   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2266 
2267   PetscFunctionBegin;
2268   info->block_size        = 1.0;
2269   info->nz_allocated      = mumps->id.INFOG(20);
2270   info->nz_used           = mumps->id.INFOG(20);
2271   info->nz_unneeded       = 0.0;
2272   info->assemblies        = 0.0;
2273   info->mallocs           = 0.0;
2274   info->memory            = 0.0;
2275   info->fill_ratio_given  = 0;
2276   info->fill_ratio_needed = 0;
2277   info->factor_mallocs    = 0;
2278   PetscFunctionReturn(0);
2279 }
2280 
2281 /* -------------------------------------------------------------------------------------------*/
2282 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2283 {
2284   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2285   const PetscScalar *arr;
2286   const PetscInt    *idxs;
2287   PetscInt          size,i;
2288 
2289   PetscFunctionBegin;
2290   PetscCall(ISGetLocalSize(is,&size));
2291   if (mumps->petsc_size > 1) {
2292     PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2293 
2294     ls   = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2295     PetscCallMPI(MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm));
2296     PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc");
2297   }
2298 
2299   /* Schur complement matrix */
2300   PetscCall(MatDestroy(&F->schur));
2301   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur));
2302   PetscCall(MatDenseGetArrayRead(F->schur,&arr));
2303   mumps->id.schur      = (MumpsScalar*)arr;
2304   mumps->id.size_schur = size;
2305   mumps->id.schur_lld  = size;
2306   PetscCall(MatDenseRestoreArrayRead(F->schur,&arr));
2307   if (mumps->sym == 1) {
2308     PetscCall(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE));
2309   }
2310 
2311   /* MUMPS expects Fortran style indices */
2312   PetscCall(PetscFree(mumps->id.listvar_schur));
2313   PetscCall(PetscMalloc1(size,&mumps->id.listvar_schur));
2314   PetscCall(ISGetIndices(is,&idxs));
2315   for (i=0; i<size; i++) PetscCall(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i])));
2316   PetscCall(ISRestoreIndices(is,&idxs));
2317   if (mumps->petsc_size > 1) {
2318     mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2319   } else {
2320     if (F->factortype == MAT_FACTOR_LU) {
2321       mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2322     } else {
2323       mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2324     }
2325   }
2326   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2327   mumps->id.ICNTL(26) = -1;
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 /* -------------------------------------------------------------------------------------------*/
2332 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2333 {
2334   Mat            St;
2335   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2336   PetscScalar    *array;
2337 #if defined(PETSC_USE_COMPLEX)
2338   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2339 #endif
2340 
2341   PetscFunctionBegin;
2342   PetscCheck(mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2343   PetscCall(MatCreate(PETSC_COMM_SELF,&St));
2344   PetscCall(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur));
2345   PetscCall(MatSetType(St,MATDENSE));
2346   PetscCall(MatSetUp(St));
2347   PetscCall(MatDenseGetArray(St,&array));
2348   if (!mumps->sym) { /* MUMPS always return a full matrix */
2349     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2350       PetscInt i,j,N=mumps->id.size_schur;
2351       for (i=0;i<N;i++) {
2352         for (j=0;j<N;j++) {
2353 #if !defined(PETSC_USE_COMPLEX)
2354           PetscScalar val = mumps->id.schur[i*N+j];
2355 #else
2356           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2357 #endif
2358           array[j*N+i] = val;
2359         }
2360       }
2361     } else { /* stored by columns */
2362       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2363     }
2364   } else { /* either full or lower-triangular (not packed) */
2365     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2366       PetscInt i,j,N=mumps->id.size_schur;
2367       for (i=0;i<N;i++) {
2368         for (j=i;j<N;j++) {
2369 #if !defined(PETSC_USE_COMPLEX)
2370           PetscScalar val = mumps->id.schur[i*N+j];
2371 #else
2372           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2373 #endif
2374           array[i*N+j] = val;
2375           array[j*N+i] = val;
2376         }
2377       }
2378     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2379       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2380     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2381       PetscInt i,j,N=mumps->id.size_schur;
2382       for (i=0;i<N;i++) {
2383         for (j=0;j<i+1;j++) {
2384 #if !defined(PETSC_USE_COMPLEX)
2385           PetscScalar val = mumps->id.schur[i*N+j];
2386 #else
2387           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2388 #endif
2389           array[i*N+j] = val;
2390           array[j*N+i] = val;
2391         }
2392       }
2393     }
2394   }
2395   PetscCall(MatDenseRestoreArray(St,&array));
2396   *S   = St;
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 /* -------------------------------------------------------------------------------------------*/
2401 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2402 {
2403   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2404 
2405   PetscFunctionBegin;
2406   PetscCall(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl)));
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2411 {
2412   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2413 
2414   PetscFunctionBegin;
2415   *ival = mumps->id.ICNTL(icntl);
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 /*@
2420   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2421 
2422    Logically Collective on Mat
2423 
2424    Input Parameters:
2425 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2426 .  icntl - index of MUMPS parameter array ICNTL()
2427 -  ival - value of MUMPS ICNTL(icntl)
2428 
2429   Options Database:
2430 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2431 
2432    Level: beginner
2433 
2434    References:
2435 .  * - MUMPS Users' Guide
2436 
2437 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2438 @*/
2439 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2440 {
2441   PetscFunctionBegin;
2442   PetscValidType(F,1);
2443   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2444   PetscValidLogicalCollectiveInt(F,icntl,2);
2445   PetscValidLogicalCollectiveInt(F,ival,3);
2446   PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
2447   PetscFunctionReturn(0);
2448 }
2449 
2450 /*@
2451   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2452 
2453    Logically Collective on Mat
2454 
2455    Input Parameters:
2456 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2457 -  icntl - index of MUMPS parameter array ICNTL()
2458 
2459   Output Parameter:
2460 .  ival - value of MUMPS ICNTL(icntl)
2461 
2462    Level: beginner
2463 
2464    References:
2465 .  * - MUMPS Users' Guide
2466 
2467 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2468 @*/
2469 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2470 {
2471   PetscFunctionBegin;
2472   PetscValidType(F,1);
2473   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2474   PetscValidLogicalCollectiveInt(F,icntl,2);
2475   PetscValidIntPointer(ival,3);
2476   PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 /* -------------------------------------------------------------------------------------------*/
2481 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2482 {
2483   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2484 
2485   PetscFunctionBegin;
2486   mumps->id.CNTL(icntl) = val;
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2491 {
2492   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2493 
2494   PetscFunctionBegin;
2495   *val = mumps->id.CNTL(icntl);
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 /*@
2500   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2501 
2502    Logically Collective on Mat
2503 
2504    Input Parameters:
2505 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2506 .  icntl - index of MUMPS parameter array CNTL()
2507 -  val - value of MUMPS CNTL(icntl)
2508 
2509   Options Database:
2510 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2511 
2512    Level: beginner
2513 
2514    References:
2515 .  * - MUMPS Users' Guide
2516 
2517 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2518 @*/
2519 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2520 {
2521   PetscFunctionBegin;
2522   PetscValidType(F,1);
2523   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2524   PetscValidLogicalCollectiveInt(F,icntl,2);
2525   PetscValidLogicalCollectiveReal(F,val,3);
2526   PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 /*@
2531   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2532 
2533    Logically Collective on Mat
2534 
2535    Input Parameters:
2536 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2537 -  icntl - index of MUMPS parameter array CNTL()
2538 
2539   Output Parameter:
2540 .  val - value of MUMPS CNTL(icntl)
2541 
2542    Level: beginner
2543 
2544    References:
2545 .  * - MUMPS Users' Guide
2546 
2547 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2548 @*/
2549 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2550 {
2551   PetscFunctionBegin;
2552   PetscValidType(F,1);
2553   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2554   PetscValidLogicalCollectiveInt(F,icntl,2);
2555   PetscValidRealPointer(val,3);
2556   PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2561 {
2562   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2563 
2564   PetscFunctionBegin;
2565   *info = mumps->id.INFO(icntl);
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2570 {
2571   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2572 
2573   PetscFunctionBegin;
2574   *infog = mumps->id.INFOG(icntl);
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2579 {
2580   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2581 
2582   PetscFunctionBegin;
2583   *rinfo = mumps->id.RINFO(icntl);
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2588 {
2589   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2590 
2591   PetscFunctionBegin;
2592   *rinfog = mumps->id.RINFOG(icntl);
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2597 {
2598   Mat            Bt = NULL,Btseq = NULL;
2599   PetscBool      flg;
2600   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2601   PetscScalar    *aa;
2602   PetscInt       spnr,*ia,*ja,M,nrhs;
2603 
2604   PetscFunctionBegin;
2605   PetscValidPointer(spRHS,2);
2606   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg));
2607   if (flg) {
2608     PetscCall(MatTransposeGetMat(spRHS,&Bt));
2609   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2610 
2611   PetscCall(MatMumpsSetIcntl(F,30,1));
2612 
2613   if (mumps->petsc_size > 1) {
2614     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2615     Btseq = b->A;
2616   } else {
2617     Btseq = Bt;
2618   }
2619 
2620   PetscCall(MatGetSize(spRHS,&M,&nrhs));
2621   mumps->id.nrhs = nrhs;
2622   mumps->id.lrhs = M;
2623   mumps->id.rhs  = NULL;
2624 
2625   if (!mumps->myid) {
2626     PetscCall(MatSeqAIJGetArray(Btseq,&aa));
2627     PetscCall(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2628     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2629     PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
2630     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2631   } else {
2632     mumps->id.irhs_ptr    = NULL;
2633     mumps->id.irhs_sparse = NULL;
2634     mumps->id.nz_rhs      = 0;
2635     mumps->id.rhs_sparse  = NULL;
2636   }
2637   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2638   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2639 
2640   /* solve phase */
2641   /*-------------*/
2642   mumps->id.job = JOB_SOLVE;
2643   PetscMUMPS_c(mumps);
2644   if (mumps->id.INFOG(1) < 0)
2645     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
2646 
2647   if (!mumps->myid) {
2648     PetscCall(MatSeqAIJRestoreArray(Btseq,&aa));
2649     PetscCall(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2650     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2651   }
2652   PetscFunctionReturn(0);
2653 }
2654 
2655 /*@
2656   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2657 
2658    Logically Collective on Mat
2659 
2660    Input Parameters:
2661 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2662 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2663 
2664   Output Parameter:
2665 . spRHS - requested entries of inverse of A
2666 
2667    Level: beginner
2668 
2669    References:
2670 .  * - MUMPS Users' Guide
2671 
2672 .seealso: MatGetFactor(), MatCreateTranspose()
2673 @*/
2674 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2675 {
2676   PetscFunctionBegin;
2677   PetscValidType(F,1);
2678   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2679   PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));
2680   PetscFunctionReturn(0);
2681 }
2682 
2683 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2684 {
2685   Mat            spRHS;
2686 
2687   PetscFunctionBegin;
2688   PetscCall(MatCreateTranspose(spRHST,&spRHS));
2689   PetscCall(MatMumpsGetInverse_MUMPS(F,spRHS));
2690   PetscCall(MatDestroy(&spRHS));
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 /*@
2695   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2696 
2697    Logically Collective on Mat
2698 
2699    Input Parameters:
2700 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2701 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2702 
2703   Output Parameter:
2704 . spRHST - requested entries of inverse of A^T
2705 
2706    Level: beginner
2707 
2708    References:
2709 .  * - MUMPS Users' Guide
2710 
2711 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2712 @*/
2713 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2714 {
2715   PetscBool      flg;
2716 
2717   PetscFunctionBegin;
2718   PetscValidType(F,1);
2719   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2720   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
2721   PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2722 
2723   PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));
2724   PetscFunctionReturn(0);
2725 }
2726 
2727 /*@
2728   MatMumpsGetInfo - Get MUMPS parameter INFO()
2729 
2730    Logically Collective on Mat
2731 
2732    Input Parameters:
2733 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2734 -  icntl - index of MUMPS parameter array INFO()
2735 
2736   Output Parameter:
2737 .  ival - value of MUMPS INFO(icntl)
2738 
2739    Level: beginner
2740 
2741    References:
2742 .  * - MUMPS Users' Guide
2743 
2744 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2745 @*/
2746 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2747 {
2748   PetscFunctionBegin;
2749   PetscValidType(F,1);
2750   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2751   PetscValidIntPointer(ival,3);
2752   PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2753   PetscFunctionReturn(0);
2754 }
2755 
2756 /*@
2757   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2758 
2759    Logically Collective on Mat
2760 
2761    Input Parameters:
2762 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2763 -  icntl - index of MUMPS parameter array INFOG()
2764 
2765   Output Parameter:
2766 .  ival - value of MUMPS INFOG(icntl)
2767 
2768    Level: beginner
2769 
2770    References:
2771 .  * - MUMPS Users' Guide
2772 
2773 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2774 @*/
2775 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2776 {
2777   PetscFunctionBegin;
2778   PetscValidType(F,1);
2779   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2780   PetscValidIntPointer(ival,3);
2781   PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2782   PetscFunctionReturn(0);
2783 }
2784 
2785 /*@
2786   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2787 
2788    Logically Collective on Mat
2789 
2790    Input Parameters:
2791 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2792 -  icntl - index of MUMPS parameter array RINFO()
2793 
2794   Output Parameter:
2795 .  val - value of MUMPS RINFO(icntl)
2796 
2797    Level: beginner
2798 
2799    References:
2800 .  * - MUMPS Users' Guide
2801 
2802 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2803 @*/
2804 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2805 {
2806   PetscFunctionBegin;
2807   PetscValidType(F,1);
2808   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2809   PetscValidRealPointer(val,3);
2810   PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2811   PetscFunctionReturn(0);
2812 }
2813 
2814 /*@
2815   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2816 
2817    Logically Collective on Mat
2818 
2819    Input Parameters:
2820 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2821 -  icntl - index of MUMPS parameter array RINFOG()
2822 
2823   Output Parameter:
2824 .  val - value of MUMPS RINFOG(icntl)
2825 
2826    Level: beginner
2827 
2828    References:
2829 .  * - MUMPS Users' Guide
2830 
2831 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2832 @*/
2833 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2834 {
2835   PetscFunctionBegin;
2836   PetscValidType(F,1);
2837   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2838   PetscValidRealPointer(val,3);
2839   PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2840   PetscFunctionReturn(0);
2841 }
2842 
2843 /*MC
2844   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2845   distributed and sequential matrices via the external package MUMPS.
2846 
2847   Works with MATAIJ and MATSBAIJ matrices
2848 
2849   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2850 
2851   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2852 
2853   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2854 
2855   Options Database Keys:
2856 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2857 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2858 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2859 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2860 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2861 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2862                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2863 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2864 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2865 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2866 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2867 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2868 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2869 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2870 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2871 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2872 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2873 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2874 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2875 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2876 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2877 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2878 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2879 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2880 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2881 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2882 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2883 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2884 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2885 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2886 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2887 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2888 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2889 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2890 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2891                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2892 
2893   Level: beginner
2894 
2895     Notes:
2896     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2897 
2898     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2899 $          KSPGetPC(ksp,&pc);
2900 $          PCFactorGetMatrix(pc,&mat);
2901 $          MatMumpsGetInfo(mat,....);
2902 $          MatMumpsGetInfog(mat,....); etc.
2903            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2904 
2905   Using MUMPS with 64-bit integers
2906     MUMPS provides 64-bit integer support in two build modes:
2907       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2908       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2909 
2910       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2911       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2912       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2913       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2914 
2915     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2916 
2917   Two modes to run MUMPS/PETSc with OpenMP
2918 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2919 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2920 
2921 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2922 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2923 
2924    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2925    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2926    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2927    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2928    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2929 
2930    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2931    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2932    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2933    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2934    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2935    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2936    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2937    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2938    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2939    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2940    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2941    examine the mapping result.
2942 
2943    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2944    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2945    calls omp_set_num_threads(m) internally before calling MUMPS.
2946 
2947    References:
2948 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2949 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2950 
2951 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix()
2952 
2953 M*/
2954 
2955 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2956 {
2957   PetscFunctionBegin;
2958   *type = MATSOLVERMUMPS;
2959   PetscFunctionReturn(0);
2960 }
2961 
2962 /* MatGetFactor for Seq and MPI AIJ matrices */
2963 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
2964 {
2965   Mat            B;
2966   Mat_MUMPS      *mumps;
2967   PetscBool      isSeqAIJ;
2968   PetscMPIInt    size;
2969 
2970   PetscFunctionBegin;
2971  #if defined(PETSC_USE_COMPLEX)
2972   PetscCheckFalse(A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
2973  #endif
2974   /* Create the factorization matrix */
2975   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ));
2976   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
2977   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2978   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
2979   PetscCall(MatSetUp(B));
2980 
2981   PetscCall(PetscNewLog(B,&mumps));
2982 
2983   B->ops->view    = MatView_MUMPS;
2984   B->ops->getinfo = MatGetInfo_MUMPS;
2985 
2986   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
2987   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
2988   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
2989   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
2990   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
2991   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
2992   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
2993   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
2994   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
2995   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
2996   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
2997   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
2998   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
2999 
3000   if (ftype == MAT_FACTOR_LU) {
3001     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3002     B->factortype            = MAT_FACTOR_LU;
3003     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3004     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3005     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3006     mumps->sym = 0;
3007   } else {
3008     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3009     B->factortype                  = MAT_FACTOR_CHOLESKY;
3010     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3011     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3012     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3013 #if defined(PETSC_USE_COMPLEX)
3014     mumps->sym = 2;
3015 #else
3016     if (A->spd_set && A->spd) mumps->sym = 1;
3017     else                      mumps->sym = 2;
3018 #endif
3019   }
3020 
3021   /* set solvertype */
3022   PetscCall(PetscFree(B->solvertype));
3023   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3024   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3025   if (size == 1) {
3026     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3027     B->canuseordering = PETSC_TRUE;
3028   }
3029   B->ops->destroy = MatDestroy_MUMPS;
3030   B->data         = (void*)mumps;
3031 
3032   PetscCall(PetscInitializeMUMPS(A,mumps));
3033 
3034   *F = B;
3035   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3036   PetscFunctionReturn(0);
3037 }
3038 
3039 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3040 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3041 {
3042   Mat            B;
3043   Mat_MUMPS      *mumps;
3044   PetscBool      isSeqSBAIJ;
3045   PetscMPIInt    size;
3046 
3047   PetscFunctionBegin;
3048  #if defined(PETSC_USE_COMPLEX)
3049   PetscCheck(!A->hermitian || A->symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3050  #endif
3051   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3052   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3053   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3054   PetscCall(MatSetUp(B));
3055 
3056   PetscCall(PetscNewLog(B,&mumps));
3057   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ));
3058   if (isSeqSBAIJ) {
3059     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3060   } else {
3061     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3062   }
3063 
3064   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3065   B->ops->view                   = MatView_MUMPS;
3066   B->ops->getinfo                = MatGetInfo_MUMPS;
3067 
3068   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3069   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3070   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3071   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3072   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3073   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3074   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3075   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3076   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3077   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3078   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3079   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3080   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3081 
3082   B->factortype = MAT_FACTOR_CHOLESKY;
3083 #if defined(PETSC_USE_COMPLEX)
3084   mumps->sym = 2;
3085 #else
3086   if (A->spd_set && A->spd) mumps->sym = 1;
3087   else                      mumps->sym = 2;
3088 #endif
3089 
3090   /* set solvertype */
3091   PetscCall(PetscFree(B->solvertype));
3092   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3093   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3094   if (size == 1) {
3095     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3096     B->canuseordering = PETSC_TRUE;
3097   }
3098   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3099   B->ops->destroy = MatDestroy_MUMPS;
3100   B->data         = (void*)mumps;
3101 
3102   PetscCall(PetscInitializeMUMPS(A,mumps));
3103 
3104   *F = B;
3105   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3106   PetscFunctionReturn(0);
3107 }
3108 
3109 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3110 {
3111   Mat            B;
3112   Mat_MUMPS      *mumps;
3113   PetscBool      isSeqBAIJ;
3114   PetscMPIInt    size;
3115 
3116   PetscFunctionBegin;
3117   /* Create the factorization matrix */
3118   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ));
3119   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3120   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3121   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3122   PetscCall(MatSetUp(B));
3123 
3124   PetscCall(PetscNewLog(B,&mumps));
3125   if (ftype == MAT_FACTOR_LU) {
3126     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3127     B->factortype            = MAT_FACTOR_LU;
3128     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3129     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3130     mumps->sym = 0;
3131     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3132   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3133 
3134   B->ops->view        = MatView_MUMPS;
3135   B->ops->getinfo     = MatGetInfo_MUMPS;
3136 
3137   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3138   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3139   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3140   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3141   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3142   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3143   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3144   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3145   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3146   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3147   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3148   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3149   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3150 
3151   /* set solvertype */
3152   PetscCall(PetscFree(B->solvertype));
3153   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3154   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3155   if (size == 1) {
3156     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3157     B->canuseordering = PETSC_TRUE;
3158   }
3159   B->ops->destroy = MatDestroy_MUMPS;
3160   B->data         = (void*)mumps;
3161 
3162   PetscCall(PetscInitializeMUMPS(A,mumps));
3163 
3164   *F = B;
3165   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3166   PetscFunctionReturn(0);
3167 }
3168 
3169 /* MatGetFactor for Seq and MPI SELL matrices */
3170 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3171 {
3172   Mat            B;
3173   Mat_MUMPS      *mumps;
3174   PetscBool      isSeqSELL;
3175   PetscMPIInt    size;
3176 
3177   PetscFunctionBegin;
3178   /* Create the factorization matrix */
3179   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL));
3180   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3181   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3182   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3183   PetscCall(MatSetUp(B));
3184 
3185   PetscCall(PetscNewLog(B,&mumps));
3186 
3187   B->ops->view        = MatView_MUMPS;
3188   B->ops->getinfo     = MatGetInfo_MUMPS;
3189 
3190   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3191   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3192   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3193   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3194   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3195   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3196   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3197   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3198   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3199   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3200   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3201 
3202   if (ftype == MAT_FACTOR_LU) {
3203     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3204     B->factortype            = MAT_FACTOR_LU;
3205     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3206     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3207     mumps->sym = 0;
3208     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3209   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3210 
3211   /* set solvertype */
3212   PetscCall(PetscFree(B->solvertype));
3213   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3214   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3215   if (size == 1) {
3216     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3217     B->canuseordering = PETSC_TRUE;
3218   }
3219   B->ops->destroy = MatDestroy_MUMPS;
3220   B->data         = (void*)mumps;
3221 
3222   PetscCall(PetscInitializeMUMPS(A,mumps));
3223 
3224   *F = B;
3225   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3226   PetscFunctionReturn(0);
3227 }
3228 
3229 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3230 {
3231   PetscFunctionBegin;
3232   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3233   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3234   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3235   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3236   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3237   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3238   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3239   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3240   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3241   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3242   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps));
3243   PetscFunctionReturn(0);
3244 }
3245