xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 2e65eb737b5b07432530db55f6b2a145ebc548b2)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30 
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45 
46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48    naming convention in PetscMPIInt, PetscBLASInt etc.
49 */
50 typedef MUMPS_INT PetscMUMPSInt;
51 
52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55   #endif
56 #else
57   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #endif
61 
62 #define MPIU_MUMPSINT             MPI_INT
63 #define PETSC_MUMPS_INT_MAX       2147483647
64 #define PETSC_MUMPS_INT_MIN       -2147483648
65 
66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68 {
69   PetscFunctionBegin;
70 #if PetscDefined(USE_64BIT_INDICES)
71   PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
72 #endif
73   *b = (PetscMUMPSInt)(a);
74   PetscFunctionReturn(0);
75 }
76 
77 /* Put these utility routines here since they are only used in this file */
78 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
79 {
80   PetscInt       myval;
81   PetscBool      myset;
82   PetscFunctionBegin;
83   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
84   PetscCall(PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub));
85   if (myset) PetscCall(PetscMUMPSIntCast(myval,value));
86   if (set) *set = myset;
87   PetscFunctionReturn(0);
88 }
89 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
90 
91 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
92 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
93 #define PetscMUMPS_c(mumps) \
94   do { \
95     if (mumps->use_petsc_omp_support) { \
96       if (mumps->is_omp_master) { \
97         PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \
98         MUMPS_c(&mumps->id); \
99         PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \
100       } \
101       PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \
102       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
103          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
104          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
105          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
106       */ \
107       PetscCallMPI(MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm));\
108       PetscCallMPI(MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm));\
109       PetscCallMPI(MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm));\
110     } else { \
111       MUMPS_c(&mumps->id); \
112     } \
113   } while (0)
114 #else
115 #define PetscMUMPS_c(mumps) \
116   do { MUMPS_c(&mumps->id); } while (0)
117 #endif
118 
119 /* declare MumpsScalar */
120 #if defined(PETSC_USE_COMPLEX)
121 #if defined(PETSC_USE_REAL_SINGLE)
122 #define MumpsScalar mumps_complex
123 #else
124 #define MumpsScalar mumps_double_complex
125 #endif
126 #else
127 #define MumpsScalar PetscScalar
128 #endif
129 
130 /* macros s.t. indices match MUMPS documentation */
131 #define ICNTL(I) icntl[(I)-1]
132 #define CNTL(I) cntl[(I)-1]
133 #define INFOG(I) infog[(I)-1]
134 #define INFO(I) info[(I)-1]
135 #define RINFOG(I) rinfog[(I)-1]
136 #define RINFO(I) rinfo[(I)-1]
137 
138 typedef struct Mat_MUMPS Mat_MUMPS;
139 struct Mat_MUMPS {
140 #if defined(PETSC_USE_COMPLEX)
141 #if defined(PETSC_USE_REAL_SINGLE)
142   CMUMPS_STRUC_C id;
143 #else
144   ZMUMPS_STRUC_C id;
145 #endif
146 #else
147 #if defined(PETSC_USE_REAL_SINGLE)
148   SMUMPS_STRUC_C id;
149 #else
150   DMUMPS_STRUC_C id;
151 #endif
152 #endif
153 
154   MatStructure   matstruc;
155   PetscMPIInt    myid,petsc_size;
156   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
157   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
158   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
159   PetscMUMPSInt  sym;
160   MPI_Comm       mumps_comm;
161   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
162   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
163   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
164   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
165 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
166   PetscInt       *rhs_nrow,max_nrhs;
167   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
168   PetscScalar    *rhs_loc,*rhs_recvbuf;
169 #endif
170   Vec            b_seq,x_seq;
171   PetscInt       ninfo,*info;           /* which INFO to display */
172   PetscInt       sizeredrhs;
173   PetscScalar    *schur_sol;
174   PetscInt       schur_sizesol;
175   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
176   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
177   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
178 
179   /* stuff used by petsc/mumps OpenMP support*/
180   PetscBool      use_petsc_omp_support;
181   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
182   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
183   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
184   PetscMPIInt    tag,omp_comm_size;
185   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
186   MPI_Request    *reqs;
187 };
188 
189 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
190    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
191  */
192 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
193 {
194   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_USE_64BIT_INDICES)
198   {
199     PetscInt i;
200     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201       PetscCall(PetscFree(mumps->ia_alloc));
202       PetscCall(PetscMalloc1(nrow+1,&mumps->ia_alloc));
203       mumps->cur_ilen = nrow+1;
204     }
205     if (nnz > mumps->cur_jlen) {
206       PetscCall(PetscFree(mumps->ja_alloc));
207       PetscCall(PetscMalloc1(nnz,&mumps->ja_alloc));
208       mumps->cur_jlen = nnz;
209     }
210     for (i=0; i<nrow+1; i++) PetscCall(PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i])));
211     for (i=0; i<nnz; i++)    PetscCall(PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i])));
212     *ia_mumps = mumps->ia_alloc;
213     *ja_mumps = mumps->ja_alloc;
214   }
215 #else
216   *ia_mumps = ia;
217   *ja_mumps = ja;
218 #endif
219   PetscCall(PetscMUMPSIntCast(nnz,nnz_mumps));
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
224 {
225   PetscFunctionBegin;
226   PetscCall(PetscFree(mumps->id.listvar_schur));
227   PetscCall(PetscFree(mumps->id.redrhs));
228   PetscCall(PetscFree(mumps->schur_sol));
229   mumps->id.size_schur = 0;
230   mumps->id.schur_lld  = 0;
231   mumps->id.ICNTL(19)  = 0;
232   PetscFunctionReturn(0);
233 }
234 
235 /* solve with rhs in mumps->id.redrhs and return in the same location */
236 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
237 {
238   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
239   Mat                  S,B,X;
240   MatFactorSchurStatus schurstatus;
241   PetscInt             sizesol;
242 
243   PetscFunctionBegin;
244   PetscCall(MatFactorFactorizeSchurComplement(F));
245   PetscCall(MatFactorGetSchurComplement(F,&S,&schurstatus));
246   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B));
247   PetscCall(MatSetType(B,((PetscObject)S)->type_name));
248 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
249   PetscCall(MatBindToCPU(B,S->boundtocpu));
250 #endif
251   switch (schurstatus) {
252   case MAT_FACTOR_SCHUR_FACTORED:
253     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X));
254     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
255 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
256     PetscCall(MatBindToCPU(X,S->boundtocpu));
257 #endif
258     if (!mumps->id.ICNTL(9)) { /* transpose solve */
259       PetscCall(MatMatSolveTranspose(S,B,X));
260     } else {
261       PetscCall(MatMatSolve(S,B,X));
262     }
263     break;
264   case MAT_FACTOR_SCHUR_INVERTED:
265     sizesol = mumps->id.nrhs*mumps->id.size_schur;
266     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
267       PetscCall(PetscFree(mumps->schur_sol));
268       PetscCall(PetscMalloc1(sizesol,&mumps->schur_sol));
269       mumps->schur_sizesol = sizesol;
270     }
271     PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X));
272     PetscCall(MatSetType(X,((PetscObject)S)->type_name));
273 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
274     PetscCall(MatBindToCPU(X,S->boundtocpu));
275 #endif
276     PetscCall(MatProductCreateWithMat(S,B,NULL,X));
277     if (!mumps->id.ICNTL(9)) { /* transpose solve */
278       PetscCall(MatProductSetType(X,MATPRODUCT_AtB));
279     } else {
280       PetscCall(MatProductSetType(X,MATPRODUCT_AB));
281     }
282     PetscCall(MatProductSetFromOptions(X));
283     PetscCall(MatProductSymbolic(X));
284     PetscCall(MatProductNumeric(X));
285 
286     PetscCall(MatCopy(X,B,SAME_NONZERO_PATTERN));
287     break;
288   default:
289     SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status);
290   }
291   PetscCall(MatFactorRestoreSchurComplement(F,&S,schurstatus));
292   PetscCall(MatDestroy(&B));
293   PetscCall(MatDestroy(&X));
294   PetscFunctionReturn(0);
295 }
296 
297 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
298 {
299   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
300 
301   PetscFunctionBegin;
302   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
303     PetscFunctionReturn(0);
304   }
305   if (!expansion) { /* prepare for the condensation step */
306     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
307     /* allocate MUMPS internal array to store reduced right-hand sides */
308     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
309       PetscCall(PetscFree(mumps->id.redrhs));
310       mumps->id.lredrhs = mumps->id.size_schur;
311       PetscCall(PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs));
312       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
313     }
314     mumps->id.ICNTL(26) = 1; /* condensation phase */
315   } else { /* prepare for the expansion step */
316     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
317     PetscCall(MatMumpsSolveSchur_Private(F));
318     mumps->id.ICNTL(26) = 2; /* expansion phase */
319     PetscMUMPS_c(mumps);
320     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
321     /* restore defaults */
322     mumps->id.ICNTL(26) = -1;
323     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
324     if (mumps->id.nrhs > 1) {
325       PetscCall(PetscFree(mumps->id.redrhs));
326       mumps->id.lredrhs = 0;
327       mumps->sizeredrhs = 0;
328     }
329   }
330   PetscFunctionReturn(0);
331 }
332 
333 /*
334   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
335 
336   input:
337     A       - matrix in aij,baij or sbaij format
338     shift   - 0: C style output triple; 1: Fortran style output triple.
339     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
340               MAT_REUSE_MATRIX:   only the values in v array are updated
341   output:
342     nnz     - dim of r, c, and v (number of local nonzero entries of A)
343     r, c, v - row and col index, matrix values (matrix triples)
344 
345   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
346   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
347   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
348 
349  */
350 
351 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
352 {
353   const PetscScalar *av;
354   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
355   PetscInt64        nz,rnz,i,j,k;
356   PetscMUMPSInt     *row,*col;
357   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
358 
359   PetscFunctionBegin;
360   PetscCall(MatSeqAIJGetArrayRead(A,&av));
361   mumps->val = (PetscScalar*)av;
362   if (reuse == MAT_INITIAL_MATRIX) {
363     nz   = aa->nz;
364     ai   = aa->i;
365     aj   = aa->j;
366     PetscCall(PetscMalloc2(nz,&row,nz,&col));
367     for (i=k=0; i<M; i++) {
368       rnz = ai[i+1] - ai[i];
369       ajj = aj + ai[i];
370       for (j=0; j<rnz; j++) {
371         PetscCall(PetscMUMPSIntCast(i+shift,&row[k]));
372         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[k]));
373         k++;
374       }
375     }
376     mumps->irn = row;
377     mumps->jcn = col;
378     mumps->nnz = nz;
379   }
380   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
381   PetscFunctionReturn(0);
382 }
383 
384 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
385 {
386   PetscInt64     nz,i,j,k,r;
387   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
388   PetscMUMPSInt  *row,*col;
389 
390   PetscFunctionBegin;
391   mumps->val = a->val;
392   if (reuse == MAT_INITIAL_MATRIX) {
393     nz   = a->sliidx[a->totalslices];
394     PetscCall(PetscMalloc2(nz,&row,nz,&col));
395     for (i=k=0; i<a->totalslices; i++) {
396       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
397         PetscCall(PetscMUMPSIntCast(8*i+r+shift,&row[k++]));
398       }
399     }
400     for (i=0;i<nz;i++) PetscCall(PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]));
401     mumps->irn = row;
402     mumps->jcn = col;
403     mumps->nnz = nz;
404   }
405   PetscFunctionReturn(0);
406 }
407 
408 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
409 {
410   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
411   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
412   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
413   PetscInt       bs;
414   PetscMUMPSInt  *row,*col;
415 
416   PetscFunctionBegin;
417   PetscCall(MatGetBlockSize(A,&bs));
418   M          = A->rmap->N/bs;
419   mumps->val = aa->a;
420   if (reuse == MAT_INITIAL_MATRIX) {
421     ai   = aa->i; aj = aa->j;
422     nz   = bs2*aa->nz;
423     PetscCall(PetscMalloc2(nz,&row,nz,&col));
424     for (i=0; i<M; i++) {
425       ajj = aj + ai[i];
426       rnz = ai[i+1] - ai[i];
427       for (k=0; k<rnz; k++) {
428         for (j=0; j<bs; j++) {
429           for (m=0; m<bs; m++) {
430             PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[idx]));
431             PetscCall(PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]));
432             idx++;
433           }
434         }
435       }
436     }
437     mumps->irn = row;
438     mumps->jcn = col;
439     mumps->nnz = nz;
440   }
441   PetscFunctionReturn(0);
442 }
443 
444 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
445 {
446   const PetscInt *ai, *aj,*ajj;
447   PetscInt        bs;
448   PetscInt64      nz,rnz,i,j,k,m;
449   PetscMUMPSInt   *row,*col;
450   PetscScalar     *val;
451   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
452   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
453 #if defined(PETSC_USE_COMPLEX)
454   PetscBool       hermitian;
455 #endif
456 
457   PetscFunctionBegin;
458 #if defined(PETSC_USE_COMPLEX)
459   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
460   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
461 #endif
462   ai   = aa->i;
463   aj   = aa->j;
464   PetscCall(MatGetBlockSize(A,&bs));
465   if (reuse == MAT_INITIAL_MATRIX) {
466     nz   = aa->nz;
467     PetscCall(PetscMalloc2(bs2*nz,&row,bs2*nz,&col));
468     if (bs>1) {
469       PetscCall(PetscMalloc1(bs2*nz,&mumps->val_alloc));
470       mumps->val = mumps->val_alloc;
471     } else {
472       mumps->val = aa->a;
473     }
474     mumps->irn = row;
475     mumps->jcn = col;
476   } else {
477     if (bs == 1) mumps->val = aa->a;
478     row = mumps->irn;
479     col = mumps->jcn;
480   }
481   val = mumps->val;
482 
483   nz = 0;
484   if (bs>1) {
485     for (i=0; i<mbs; i++) {
486       rnz = ai[i+1] - ai[i];
487       ajj = aj + ai[i];
488       for (j=0; j<rnz; j++) {
489         for (k=0; k<bs; k++) {
490           for (m=0; m<bs; m++) {
491             if (ajj[j]>i || k>=m) {
492               if (reuse == MAT_INITIAL_MATRIX) {
493                 PetscCall(PetscMUMPSIntCast(i*bs + m + shift,&row[nz]));
494                 PetscCall(PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]));
495               }
496               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
497             }
498           }
499         }
500       }
501     }
502   } else if (reuse == MAT_INITIAL_MATRIX) {
503     for (i=0; i<mbs; i++) {
504       rnz = ai[i+1] - ai[i];
505       ajj = aj + ai[i];
506       for (j=0; j<rnz; j++) {
507         PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
508         PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
509         nz++;
510       }
511     }
512     PetscCheck(nz == aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz);
513   }
514   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
515   PetscFunctionReturn(0);
516 }
517 
518 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
519 {
520   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
521   PetscInt64        nz,rnz,i,j;
522   const PetscScalar *av,*v1;
523   PetscScalar       *val;
524   PetscMUMPSInt     *row,*col;
525   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
526   PetscBool         missing;
527 #if defined(PETSC_USE_COMPLEX)
528   PetscBool         hermitian;
529 #endif
530 
531   PetscFunctionBegin;
532 #if defined(PETSC_USE_COMPLEX)
533   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
534   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
535 #endif
536   PetscCall(MatSeqAIJGetArrayRead(A,&av));
537   ai    = aa->i; aj = aa->j;
538   adiag = aa->diag;
539   PetscCall(MatMissingDiagonal_SeqAIJ(A,&missing,NULL));
540   if (reuse == MAT_INITIAL_MATRIX) {
541     /* count nz in the upper triangular part of A */
542     nz = 0;
543     if (missing) {
544       for (i=0; i<M; i++) {
545         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
546           for (j=ai[i];j<ai[i+1];j++) {
547             if (aj[j] < i) continue;
548             nz++;
549           }
550         } else {
551           nz += ai[i+1] - adiag[i];
552         }
553       }
554     } else {
555       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
556     }
557     PetscCall(PetscMalloc2(nz,&row,nz,&col));
558     PetscCall(PetscMalloc1(nz,&val));
559     mumps->nnz = nz;
560     mumps->irn = row;
561     mumps->jcn = col;
562     mumps->val = mumps->val_alloc = val;
563 
564     nz = 0;
565     if (missing) {
566       for (i=0; i<M; i++) {
567         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
568           for (j=ai[i];j<ai[i+1];j++) {
569             if (aj[j] < i) continue;
570             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
571             PetscCall(PetscMUMPSIntCast(aj[j]+shift,&col[nz]));
572             val[nz] = av[j];
573             nz++;
574           }
575         } else {
576           rnz = ai[i+1] - adiag[i];
577           ajj = aj + adiag[i];
578           v1  = av + adiag[i];
579           for (j=0; j<rnz; j++) {
580             PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
581             PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
582             val[nz++] = v1[j];
583           }
584         }
585       }
586     } else {
587       for (i=0; i<M; i++) {
588         rnz = ai[i+1] - adiag[i];
589         ajj = aj + adiag[i];
590         v1  = av + adiag[i];
591         for (j=0; j<rnz; j++) {
592           PetscCall(PetscMUMPSIntCast(i+shift,&row[nz]));
593           PetscCall(PetscMUMPSIntCast(ajj[j] + shift,&col[nz]));
594           val[nz++] = v1[j];
595         }
596       }
597     }
598   } else {
599     nz = 0;
600     val = mumps->val;
601     if (missing) {
602       for (i=0; i <M; i++) {
603         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
604           for (j=ai[i];j<ai[i+1];j++) {
605             if (aj[j] < i) continue;
606             val[nz++] = av[j];
607           }
608         } else {
609           rnz = ai[i+1] - adiag[i];
610           v1  = av + adiag[i];
611           for (j=0; j<rnz; j++) {
612             val[nz++] = v1[j];
613           }
614         }
615       }
616     } else {
617       for (i=0; i <M; i++) {
618         rnz = ai[i+1] - adiag[i];
619         v1  = av + adiag[i];
620         for (j=0; j<rnz; j++) {
621           val[nz++] = v1[j];
622         }
623       }
624     }
625   }
626   PetscCall(MatSeqAIJRestoreArrayRead(A,&av));
627   PetscFunctionReturn(0);
628 }
629 
630 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
631 {
632   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
633   PetscInt          bs;
634   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
635   PetscMUMPSInt     *row,*col;
636   const PetscScalar *av,*bv,*v1,*v2;
637   PetscScalar       *val;
638   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
639   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
640   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
641   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
642 #if defined(PETSC_USE_COMPLEX)
643   PetscBool         hermitian;
644 #endif
645 
646   PetscFunctionBegin;
647 #if defined(PETSC_USE_COMPLEX)
648   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
649   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
650 #endif
651   PetscCall(MatGetBlockSize(A,&bs));
652   rstart = A->rmap->rstart;
653   ai = aa->i;
654   aj = aa->j;
655   bi = bb->i;
656   bj = bb->j;
657   av = aa->a;
658   bv = bb->a;
659 
660   garray = mat->garray;
661 
662   if (reuse == MAT_INITIAL_MATRIX) {
663     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
664     PetscCall(PetscMalloc2(nz,&row,nz,&col));
665     PetscCall(PetscMalloc1(nz,&val));
666     /* can not decide the exact mumps->nnz now because of the SBAIJ */
667     mumps->irn = row;
668     mumps->jcn = col;
669     mumps->val = mumps->val_alloc = val;
670   } else {
671     val = mumps->val;
672   }
673 
674   jj = 0; irow = rstart;
675   for (i=0; i<mbs; i++) {
676     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
677     countA = ai[i+1] - ai[i];
678     countB = bi[i+1] - bi[i];
679     bjj    = bj + bi[i];
680     v1     = av + ai[i]*bs2;
681     v2     = bv + bi[i]*bs2;
682 
683     if (bs>1) {
684       /* A-part */
685       for (j=0; j<countA; j++) {
686         for (k=0; k<bs; k++) {
687           for (m=0; m<bs; m++) {
688             if (rstart + ajj[j]*bs>irow || k>=m) {
689               if (reuse == MAT_INITIAL_MATRIX) {
690                 PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
691                 PetscCall(PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]));
692               }
693               val[jj++] = v1[j*bs2 + m + k*bs];
694             }
695           }
696         }
697       }
698 
699       /* B-part */
700       for (j=0; j < countB; j++) {
701         for (k=0; k<bs; k++) {
702           for (m=0; m<bs; m++) {
703             if (reuse == MAT_INITIAL_MATRIX) {
704               PetscCall(PetscMUMPSIntCast(irow + m + shift,&row[jj]));
705               PetscCall(PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]));
706             }
707             val[jj++] = v2[j*bs2 + m + k*bs];
708           }
709         }
710       }
711     } else {
712       /* A-part */
713       for (j=0; j<countA; j++) {
714         if (reuse == MAT_INITIAL_MATRIX) {
715           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
716           PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
717         }
718         val[jj++] = v1[j];
719       }
720 
721       /* B-part */
722       for (j=0; j < countB; j++) {
723         if (reuse == MAT_INITIAL_MATRIX) {
724           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
725           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
726         }
727         val[jj++] = v2[j];
728       }
729     }
730     irow+=bs;
731   }
732   mumps->nnz = jj;
733   PetscFunctionReturn(0);
734 }
735 
736 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
737 {
738   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
739   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
740   PetscMUMPSInt     *row,*col;
741   const PetscScalar *av, *bv,*v1,*v2;
742   PetscScalar       *val;
743   Mat               Ad,Ao;
744   Mat_SeqAIJ        *aa;
745   Mat_SeqAIJ        *bb;
746 
747   PetscFunctionBegin;
748   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
749   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
750   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
751 
752   aa = (Mat_SeqAIJ*)(Ad)->data;
753   bb = (Mat_SeqAIJ*)(Ao)->data;
754   ai = aa->i;
755   aj = aa->j;
756   bi = bb->i;
757   bj = bb->j;
758 
759   rstart = A->rmap->rstart;
760 
761   if (reuse == MAT_INITIAL_MATRIX) {
762     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
763     PetscCall(PetscMalloc2(nz,&row,nz,&col));
764     PetscCall(PetscMalloc1(nz,&val));
765     mumps->nnz = nz;
766     mumps->irn = row;
767     mumps->jcn = col;
768     mumps->val = mumps->val_alloc = val;
769   } else {
770     val = mumps->val;
771   }
772 
773   jj = 0; irow = rstart;
774   for (i=0; i<m; i++) {
775     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
776     countA = ai[i+1] - ai[i];
777     countB = bi[i+1] - bi[i];
778     bjj    = bj + bi[i];
779     v1     = av + ai[i];
780     v2     = bv + bi[i];
781 
782     /* A-part */
783     for (j=0; j<countA; j++) {
784       if (reuse == MAT_INITIAL_MATRIX) {
785         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
786         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
787       }
788       val[jj++] = v1[j];
789     }
790 
791     /* B-part */
792     for (j=0; j < countB; j++) {
793       if (reuse == MAT_INITIAL_MATRIX) {
794         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
795         PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
796       }
797       val[jj++] = v2[j];
798     }
799     irow++;
800   }
801   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
802   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
803   PetscFunctionReturn(0);
804 }
805 
806 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
807 {
808   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
809   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
810   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
811   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
812   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
813   const PetscInt    bs2=mat->bs2;
814   PetscInt          bs;
815   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
816   PetscMUMPSInt     *row,*col;
817   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
818   PetscScalar       *val;
819 
820   PetscFunctionBegin;
821   PetscCall(MatGetBlockSize(A,&bs));
822   if (reuse == MAT_INITIAL_MATRIX) {
823     nz   = bs2*(aa->nz + bb->nz);
824     PetscCall(PetscMalloc2(nz,&row,nz,&col));
825     PetscCall(PetscMalloc1(nz,&val));
826     mumps->nnz = nz;
827     mumps->irn = row;
828     mumps->jcn = col;
829     mumps->val = mumps->val_alloc = val;
830   } else {
831     val = mumps->val;
832   }
833 
834   jj = 0; irow = rstart;
835   for (i=0; i<mbs; i++) {
836     countA = ai[i+1] - ai[i];
837     countB = bi[i+1] - bi[i];
838     ajj    = aj + ai[i];
839     bjj    = bj + bi[i];
840     v1     = av + bs2*ai[i];
841     v2     = bv + bs2*bi[i];
842 
843     idx = 0;
844     /* A-part */
845     for (k=0; k<countA; k++) {
846       for (j=0; j<bs; j++) {
847         for (n=0; n<bs; n++) {
848           if (reuse == MAT_INITIAL_MATRIX) {
849             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
850             PetscCall(PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]));
851           }
852           val[jj++] = v1[idx++];
853         }
854       }
855     }
856 
857     idx = 0;
858     /* B-part */
859     for (k=0; k<countB; k++) {
860       for (j=0; j<bs; j++) {
861         for (n=0; n<bs; n++) {
862           if (reuse == MAT_INITIAL_MATRIX) {
863             PetscCall(PetscMUMPSIntCast(irow + n + shift,&row[jj]));
864             PetscCall(PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]));
865           }
866           val[jj++] = v2[idx++];
867         }
868       }
869     }
870     irow += bs;
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
876 {
877   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
878   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
879   PetscMUMPSInt     *row,*col;
880   const PetscScalar *av, *bv,*v1,*v2;
881   PetscScalar       *val;
882   Mat               Ad,Ao;
883   Mat_SeqAIJ        *aa;
884   Mat_SeqAIJ        *bb;
885 #if defined(PETSC_USE_COMPLEX)
886   PetscBool         hermitian;
887 #endif
888 
889   PetscFunctionBegin;
890 #if defined(PETSC_USE_COMPLEX)
891   PetscCall(MatGetOption(A,MAT_HERMITIAN,&hermitian));
892   PetscCheck(!hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
893 #endif
894   PetscCall(MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray));
895   PetscCall(MatSeqAIJGetArrayRead(Ad,&av));
896   PetscCall(MatSeqAIJGetArrayRead(Ao,&bv));
897 
898   aa    = (Mat_SeqAIJ*)(Ad)->data;
899   bb    = (Mat_SeqAIJ*)(Ao)->data;
900   ai    = aa->i;
901   aj    = aa->j;
902   adiag = aa->diag;
903   bi    = bb->i;
904   bj    = bb->j;
905 
906   rstart = A->rmap->rstart;
907 
908   if (reuse == MAT_INITIAL_MATRIX) {
909     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
910     nzb = 0;    /* num of upper triangular entries in mat->B */
911     for (i=0; i<m; i++) {
912       nza   += (ai[i+1] - adiag[i]);
913       countB = bi[i+1] - bi[i];
914       bjj    = bj + bi[i];
915       for (j=0; j<countB; j++) {
916         if (garray[bjj[j]] > rstart) nzb++;
917       }
918     }
919 
920     nz   = nza + nzb; /* total nz of upper triangular part of mat */
921     PetscCall(PetscMalloc2(nz,&row,nz,&col));
922     PetscCall(PetscMalloc1(nz,&val));
923     mumps->nnz = nz;
924     mumps->irn = row;
925     mumps->jcn = col;
926     mumps->val = mumps->val_alloc = val;
927   } else {
928     val = mumps->val;
929   }
930 
931   jj = 0; irow = rstart;
932   for (i=0; i<m; i++) {
933     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
934     v1     = av + adiag[i];
935     countA = ai[i+1] - adiag[i];
936     countB = bi[i+1] - bi[i];
937     bjj    = bj + bi[i];
938     v2     = bv + bi[i];
939 
940     /* A-part */
941     for (j=0; j<countA; j++) {
942       if (reuse == MAT_INITIAL_MATRIX) {
943         PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
944         PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]));
945       }
946       val[jj++] = v1[j];
947     }
948 
949     /* B-part */
950     for (j=0; j < countB; j++) {
951       if (garray[bjj[j]] > rstart) {
952         if (reuse == MAT_INITIAL_MATRIX) {
953           PetscCall(PetscMUMPSIntCast(irow + shift,&row[jj]));
954           PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]));
955         }
956         val[jj++] = v2[j];
957       }
958     }
959     irow++;
960   }
961   PetscCall(MatSeqAIJRestoreArrayRead(Ad,&av));
962   PetscCall(MatSeqAIJRestoreArrayRead(Ao,&bv));
963   PetscFunctionReturn(0);
964 }
965 
966 PetscErrorCode MatDestroy_MUMPS(Mat A)
967 {
968   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
969 
970   PetscFunctionBegin;
971   PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
972   PetscCall(VecScatterDestroy(&mumps->scat_rhs));
973   PetscCall(VecScatterDestroy(&mumps->scat_sol));
974   PetscCall(VecDestroy(&mumps->b_seq));
975   PetscCall(VecDestroy(&mumps->x_seq));
976   PetscCall(PetscFree(mumps->id.perm_in));
977   PetscCall(PetscFree2(mumps->irn,mumps->jcn));
978   PetscCall(PetscFree(mumps->val_alloc));
979   PetscCall(PetscFree(mumps->info));
980   PetscCall(MatMumpsResetSchur_Private(mumps));
981   mumps->id.job = JOB_END;
982   PetscMUMPS_c(mumps);
983   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
984 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
985   if (mumps->use_petsc_omp_support) {
986     PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl));
987     PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
988     PetscCall(PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps));
989   }
990 #endif
991   PetscCall(PetscFree(mumps->ia_alloc));
992   PetscCall(PetscFree(mumps->ja_alloc));
993   PetscCall(PetscFree(mumps->recvcount));
994   PetscCall(PetscFree(mumps->reqs));
995   PetscCall(PetscFree(mumps->irhs_loc));
996   if (mumps->mumps_comm != MPI_COMM_NULL) PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm));
997   PetscCall(PetscFree(A->data));
998 
999   /* clear composed functions */
1000   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
1001   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL));
1002   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL));
1003   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL));
1004   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL));
1005   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL));
1006   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL));
1007   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL));
1008   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL));
1009   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL));
1010   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL));
1011   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL));
1012   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL));
1013   PetscFunctionReturn(0);
1014 }
1015 
1016 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1017 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1018 {
1019   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1020   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1021   PetscInt           i,m,M,rstart;
1022 
1023   PetscFunctionBegin;
1024   PetscCall(MatGetSize(A,&M,NULL));
1025   PetscCall(MatGetLocalSize(A,&m,NULL));
1026   PetscCheck(M <= PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1027   if (ompsize == 1) {
1028     if (!mumps->irhs_loc) {
1029       mumps->nloc_rhs = m;
1030       PetscCall(PetscMalloc1(m,&mumps->irhs_loc));
1031       PetscCall(MatGetOwnershipRange(A,&rstart,NULL));
1032       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1033     }
1034     mumps->id.rhs_loc = (MumpsScalar*)array;
1035   } else {
1036   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1037     const PetscInt  *ranges;
1038     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1039     MPI_Group       petsc_group,omp_group;
1040     PetscScalar     *recvbuf=NULL;
1041 
1042     if (mumps->is_omp_master) {
1043       /* Lazily initialize the omp stuff for distributed rhs */
1044       if (!mumps->irhs_loc) {
1045         PetscCall(PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks));
1046         PetscCall(PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps));
1047         PetscCallMPI(MPI_Comm_group(mumps->petsc_comm,&petsc_group));
1048         PetscCallMPI(MPI_Comm_group(mumps->omp_comm,&omp_group));
1049         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1050         PetscCallMPI(MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks));
1051 
1052         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1053         mumps->nloc_rhs = 0;
1054         PetscCall(MatGetOwnershipRanges(A,&ranges));
1055         for (j=0; j<ompsize; j++) {
1056           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1057           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1058         }
1059         PetscCall(PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc));
1060         for (j=k=0; j<ompsize; j++) {
1061           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1062         }
1063 
1064         PetscCall(PetscFree2(omp_ranks,petsc_ranks));
1065         PetscCallMPI(MPI_Group_free(&petsc_group));
1066         PetscCallMPI(MPI_Group_free(&omp_group));
1067       }
1068 
1069       /* Realloc buffers when current nrhs is bigger than what we have met */
1070       if (nrhs > mumps->max_nrhs) {
1071         PetscCall(PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf));
1072         PetscCall(PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf));
1073         mumps->max_nrhs = nrhs;
1074       }
1075 
1076       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1077       for (j=0; j<ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]));
1078       mumps->rhs_disps[0] = 0;
1079       for (j=1; j<ompsize; j++) {
1080         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1081         PetscCheck(mumps->rhs_disps[j] >= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1082       }
1083       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1084     }
1085 
1086     PetscCall(PetscMPIIntCast(m*nrhs,&sendcount));
1087     PetscCallMPI(MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm));
1088 
1089     if (mumps->is_omp_master) {
1090       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1091         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1092         for (j=0; j<ompsize; j++) {
1093           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1094           dst = dstbase;
1095           for (i=0; i<nrhs; i++) {
1096             PetscCall(PetscArraycpy(dst,src,mumps->rhs_nrow[j]));
1097             src += mumps->rhs_nrow[j];
1098             dst += mumps->nloc_rhs;
1099           }
1100           dstbase += mumps->rhs_nrow[j];
1101         }
1102       }
1103       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1104     }
1105   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1106   }
1107   mumps->id.nrhs     = nrhs;
1108   mumps->id.nloc_rhs = mumps->nloc_rhs;
1109   mumps->id.lrhs_loc = mumps->nloc_rhs;
1110   mumps->id.irhs_loc = mumps->irhs_loc;
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1115 {
1116   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1117   const PetscScalar  *rarray = NULL;
1118   PetscScalar        *array;
1119   IS                 is_iden,is_petsc;
1120   PetscInt           i;
1121   PetscBool          second_solve = PETSC_FALSE;
1122   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1123 
1124   PetscFunctionBegin;
1125   PetscCall(PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1));
1126   PetscCall(PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2));
1127 
1128   if (A->factorerrortype) {
1129     PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1130     PetscCall(VecSetInf(x));
1131     PetscFunctionReturn(0);
1132   }
1133 
1134   mumps->id.nrhs = 1;
1135   if (mumps->petsc_size > 1) {
1136     if (mumps->ICNTL20 == 10) {
1137       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1138       PetscCall(VecGetArrayRead(b,&rarray));
1139       PetscCall(MatMumpsSetUpDistRHSInfo(A,1,rarray));
1140     } else {
1141       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1142       PetscCall(VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1143       PetscCall(VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD));
1144       if (!mumps->myid) {
1145         PetscCall(VecGetArray(mumps->b_seq,&array));
1146         mumps->id.rhs = (MumpsScalar*)array;
1147       }
1148     }
1149   } else {  /* petsc_size == 1 */
1150     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1151     PetscCall(VecCopy(b,x));
1152     PetscCall(VecGetArray(x,&array));
1153     mumps->id.rhs = (MumpsScalar*)array;
1154   }
1155 
1156   /*
1157      handle condensation step of Schur complement (if any)
1158      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1159      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1160      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1161      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1162   */
1163   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1164     PetscCheck(mumps->petsc_size <= 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1165     second_solve = PETSC_TRUE;
1166     PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1167   }
1168   /* solve phase */
1169   /*-------------*/
1170   mumps->id.job = JOB_SOLVE;
1171   PetscMUMPS_c(mumps);
1172   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1173 
1174   /* handle expansion step of Schur complement (if any) */
1175   if (second_solve) {
1176     PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1177   }
1178 
1179   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1180     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1181       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1182       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1183     }
1184     if (!mumps->scat_sol) { /* create scatter scat_sol */
1185       PetscInt *isol2_loc=NULL;
1186       PetscCall(ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden)); /* from */
1187       PetscCall(PetscMalloc1(mumps->id.lsol_loc,&isol2_loc));
1188       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1189       PetscCall(ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc));  /* to */
1190       PetscCall(VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol));
1191       PetscCall(ISDestroy(&is_iden));
1192       PetscCall(ISDestroy(&is_petsc));
1193       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1194     }
1195 
1196     PetscCall(VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1197     PetscCall(VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD));
1198   }
1199 
1200   if (mumps->petsc_size > 1) {
1201     if (mumps->ICNTL20 == 10) {
1202       PetscCall(VecRestoreArrayRead(b,&rarray));
1203     } else if (!mumps->myid) {
1204       PetscCall(VecRestoreArray(mumps->b_seq,&array));
1205     }
1206   } else PetscCall(VecRestoreArray(x,&array));
1207 
1208   PetscCall(PetscLogFlops(2.0*mumps->id.RINFO(3)));
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1213 {
1214   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1215 
1216   PetscFunctionBegin;
1217   mumps->id.ICNTL(9) = 0;
1218   PetscCall(MatSolve_MUMPS(A,b,x));
1219   mumps->id.ICNTL(9) = 1;
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1224 {
1225   Mat               Bt = NULL;
1226   PetscBool         denseX,denseB,flg,flgT;
1227   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1228   PetscInt          i,nrhs,M;
1229   PetscScalar       *array;
1230   const PetscScalar *rbray;
1231   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1232   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1233   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1234   IS                is_to,is_from;
1235   PetscInt          k,proc,j,m,myrstart;
1236   const PetscInt    *rstart;
1237   Vec               v_mpi,msol_loc;
1238   VecScatter        scat_sol;
1239   Vec               b_seq;
1240   VecScatter        scat_rhs;
1241   PetscScalar       *aa;
1242   PetscInt          spnr,*ia,*ja;
1243   Mat_MPIAIJ        *b = NULL;
1244 
1245   PetscFunctionBegin;
1246   PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL));
1247   PetscCheck(denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1248 
1249   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL));
1250   if (denseB) {
1251     PetscCheck(B->rmap->n == X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1252     mumps->id.ICNTL(20)= 0; /* dense RHS */
1253   } else { /* sparse B */
1254     PetscCheck(X != B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1255     PetscCall(PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT));
1256     if (flgT) { /* input B is transpose of actural RHS matrix,
1257                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1258       PetscCall(MatTransposeGetMat(B,&Bt));
1259     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1260     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1261   }
1262 
1263   PetscCall(MatGetSize(B,&M,&nrhs));
1264   mumps->id.nrhs = nrhs;
1265   mumps->id.lrhs = M;
1266   mumps->id.rhs  = NULL;
1267 
1268   if (mumps->petsc_size == 1) {
1269     PetscScalar *aa;
1270     PetscInt    spnr,*ia,*ja;
1271     PetscBool   second_solve = PETSC_FALSE;
1272 
1273     PetscCall(MatDenseGetArray(X,&array));
1274     mumps->id.rhs = (MumpsScalar*)array;
1275 
1276     if (denseB) {
1277       /* copy B to X */
1278       PetscCall(MatDenseGetArrayRead(B,&rbray));
1279       PetscCall(PetscArraycpy(array,rbray,M*nrhs));
1280       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1281     } else { /* sparse B */
1282       PetscCall(MatSeqAIJGetArray(Bt,&aa));
1283       PetscCall(MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1284       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1285       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1286       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1287     }
1288     /* handle condensation step of Schur complement (if any) */
1289     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1290       second_solve = PETSC_TRUE;
1291       PetscCall(MatMumpsHandleSchur_Private(A,PETSC_FALSE));
1292     }
1293     /* solve phase */
1294     /*-------------*/
1295     mumps->id.job = JOB_SOLVE;
1296     PetscMUMPS_c(mumps);
1297     PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1298 
1299     /* handle expansion step of Schur complement (if any) */
1300     if (second_solve) {
1301       PetscCall(MatMumpsHandleSchur_Private(A,PETSC_TRUE));
1302     }
1303     if (!denseB) { /* sparse B */
1304       PetscCall(MatSeqAIJRestoreArray(Bt,&aa));
1305       PetscCall(MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1306       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1307     }
1308     PetscCall(MatDenseRestoreArray(X,&array));
1309     PetscFunctionReturn(0);
1310   }
1311 
1312   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1313   PetscCheckFalse(mumps->petsc_size > 1 && mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1314 
1315   /* create msol_loc to hold mumps local solution */
1316   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1317   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1318 
1319   lsol_loc  = mumps->id.lsol_loc;
1320   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1321   PetscCall(PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc));
1322   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1323   mumps->id.isol_loc = isol_loc;
1324 
1325   PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc));
1326 
1327   if (denseB) {
1328     if (mumps->ICNTL20 == 10) {
1329       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1330       PetscCall(MatDenseGetArrayRead(B,&rbray));
1331       PetscCall(MatMumpsSetUpDistRHSInfo(A,nrhs,rbray));
1332       PetscCall(MatDenseRestoreArrayRead(B,&rbray));
1333       PetscCall(MatGetLocalSize(B,&m,NULL));
1334       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi));
1335     } else {
1336       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1337       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1338         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1339         0, re-arrange B into desired order, which is a local operation.
1340       */
1341 
1342       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1343       /* wrap dense rhs matrix B into a vector v_mpi */
1344       PetscCall(MatGetLocalSize(B,&m,NULL));
1345       PetscCall(MatDenseGetArray(B,&bray));
1346       PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1347       PetscCall(MatDenseRestoreArray(B,&bray));
1348 
1349       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1350       if (!mumps->myid) {
1351         PetscInt *idx;
1352         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1353         PetscCall(PetscMalloc1(nrhs*M,&idx));
1354         PetscCall(MatGetOwnershipRanges(B,&rstart));
1355         k = 0;
1356         for (proc=0; proc<mumps->petsc_size; proc++) {
1357           for (j=0; j<nrhs; j++) {
1358             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1359           }
1360         }
1361 
1362         PetscCall(VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq));
1363         PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to));
1364         PetscCall(ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from));
1365       } else {
1366         PetscCall(VecCreateSeq(PETSC_COMM_SELF,0,&b_seq));
1367         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to));
1368         PetscCall(ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from));
1369       }
1370       PetscCall(VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs));
1371       PetscCall(VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1372       PetscCall(ISDestroy(&is_to));
1373       PetscCall(ISDestroy(&is_from));
1374       PetscCall(VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD));
1375 
1376       if (!mumps->myid) { /* define rhs on the host */
1377         PetscCall(VecGetArray(b_seq,&bray));
1378         mumps->id.rhs = (MumpsScalar*)bray;
1379         PetscCall(VecRestoreArray(b_seq,&bray));
1380       }
1381     }
1382   } else { /* sparse B */
1383     b = (Mat_MPIAIJ*)Bt->data;
1384 
1385     /* wrap dense X into a vector v_mpi */
1386     PetscCall(MatGetLocalSize(X,&m,NULL));
1387     PetscCall(MatDenseGetArray(X,&bray));
1388     PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi));
1389     PetscCall(MatDenseRestoreArray(X,&bray));
1390 
1391     if (!mumps->myid) {
1392       PetscCall(MatSeqAIJGetArray(b->A,&aa));
1393       PetscCall(MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1394       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1395       PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
1396       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1397     } else {
1398       mumps->id.irhs_ptr    = NULL;
1399       mumps->id.irhs_sparse = NULL;
1400       mumps->id.nz_rhs      = 0;
1401       mumps->id.rhs_sparse  = NULL;
1402     }
1403   }
1404 
1405   /* solve phase */
1406   /*-------------*/
1407   mumps->id.job = JOB_SOLVE;
1408   PetscMUMPS_c(mumps);
1409   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1410 
1411   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1412   PetscCall(MatDenseGetArray(X,&array));
1413   PetscCall(VecPlaceArray(v_mpi,array));
1414 
1415   /* create scatter scat_sol */
1416   PetscCall(MatGetOwnershipRanges(X,&rstart));
1417   /* iidx: index for scatter mumps solution to petsc X */
1418 
1419   PetscCall(ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from));
1420   PetscCall(PetscMalloc1(nlsol_loc,&idxx));
1421   for (i=0; i<lsol_loc; i++) {
1422     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1423 
1424     for (proc=0; proc<mumps->petsc_size; proc++) {
1425       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1426         myrstart = rstart[proc];
1427         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1428         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1429         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1430         break;
1431       }
1432     }
1433 
1434     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1435   }
1436   PetscCall(ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to));
1437   PetscCall(VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol));
1438   PetscCall(VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1439   PetscCall(ISDestroy(&is_from));
1440   PetscCall(ISDestroy(&is_to));
1441   PetscCall(VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD));
1442   PetscCall(MatDenseRestoreArray(X,&array));
1443 
1444   /* free spaces */
1445   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1446   mumps->id.isol_loc = isol_loc_save;
1447 
1448   PetscCall(PetscFree2(sol_loc,isol_loc));
1449   PetscCall(PetscFree(idxx));
1450   PetscCall(VecDestroy(&msol_loc));
1451   PetscCall(VecDestroy(&v_mpi));
1452   if (!denseB) {
1453     if (!mumps->myid) {
1454       b = (Mat_MPIAIJ*)Bt->data;
1455       PetscCall(MatSeqAIJRestoreArray(b->A,&aa));
1456       PetscCall(MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
1457       PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1458     }
1459   } else {
1460     if (mumps->ICNTL20 == 0) {
1461       PetscCall(VecDestroy(&b_seq));
1462       PetscCall(VecScatterDestroy(&scat_rhs));
1463     }
1464   }
1465   PetscCall(VecScatterDestroy(&scat_sol));
1466   PetscCall(PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3)));
1467   PetscFunctionReturn(0);
1468 }
1469 
1470 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1471 {
1472   PetscBool      flg;
1473   Mat            B;
1474 
1475   PetscFunctionBegin;
1476   PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
1477   PetscCheck(flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1478 
1479   /* Create B=Bt^T that uses Bt's data structure */
1480   PetscCall(MatCreateTranspose(Bt,&B));
1481 
1482   PetscCall(MatMatSolve_MUMPS(A,B,X));
1483   PetscCall(MatDestroy(&B));
1484   PetscFunctionReturn(0);
1485 }
1486 
1487 #if !defined(PETSC_USE_COMPLEX)
1488 /*
1489   input:
1490    F:        numeric factor
1491   output:
1492    nneg:     total number of negative pivots
1493    nzero:    total number of zero pivots
1494    npos:     (global dimension of F) - nneg - nzero
1495 */
1496 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1497 {
1498   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1499   PetscMPIInt    size;
1500 
1501   PetscFunctionBegin;
1502   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F),&size));
1503   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1504   PetscCheckFalse(size > 1 && mumps->id.ICNTL(13) != 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13));
1505 
1506   if (nneg) *nneg = mumps->id.INFOG(12);
1507   if (nzero || npos) {
1508     PetscCheck(mumps->id.ICNTL(24) == 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1509     if (nzero) *nzero = mumps->id.INFOG(28);
1510     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1511   }
1512   PetscFunctionReturn(0);
1513 }
1514 #endif
1515 
1516 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1517 {
1518   PetscInt       i,nreqs;
1519   PetscMUMPSInt  *irn,*jcn;
1520   PetscMPIInt    count;
1521   PetscInt64     totnnz,remain;
1522   const PetscInt osize=mumps->omp_comm_size;
1523   PetscScalar    *val;
1524 
1525   PetscFunctionBegin;
1526   if (osize > 1) {
1527     if (reuse == MAT_INITIAL_MATRIX) {
1528       /* master first gathers counts of nonzeros to receive */
1529       if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize,&mumps->recvcount));
1530       PetscCallMPI(MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm));
1531 
1532       /* Then each computes number of send/recvs */
1533       if (mumps->is_omp_master) {
1534         /* Start from 1 since self communication is not done in MPI */
1535         nreqs = 0;
1536         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1537       } else {
1538         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1539       }
1540       PetscCall(PetscMalloc1(nreqs*3,&mumps->reqs)); /* Triple the requests since we send irn, jcn and val seperately */
1541 
1542       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1543          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1544          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1545          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1546        */
1547       nreqs = 0; /* counter for actual send/recvs */
1548       if (mumps->is_omp_master) {
1549         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1550         PetscCall(PetscMalloc2(totnnz,&irn,totnnz,&jcn));
1551         PetscCall(PetscMalloc1(totnnz,&val));
1552 
1553         /* Self communication */
1554         PetscCall(PetscArraycpy(irn,mumps->irn,mumps->nnz));
1555         PetscCall(PetscArraycpy(jcn,mumps->jcn,mumps->nnz));
1556         PetscCall(PetscArraycpy(val,mumps->val,mumps->nnz));
1557 
1558         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1559         PetscCall(PetscFree2(mumps->irn,mumps->jcn));
1560         PetscCall(PetscFree(mumps->val_alloc));
1561         mumps->nnz = totnnz;
1562         mumps->irn = irn;
1563         mumps->jcn = jcn;
1564         mumps->val = mumps->val_alloc = val;
1565 
1566         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1567         jcn += mumps->recvcount[0];
1568         val += mumps->recvcount[0];
1569 
1570         /* Remote communication */
1571         for (i=1; i<osize; i++) {
1572           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1573           remain = mumps->recvcount[i] - count;
1574           while (count>0) {
1575             PetscCallMPI(MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1576             PetscCallMPI(MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1577             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1578             irn    += count;
1579             jcn    += count;
1580             val    += count;
1581             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1582             remain -= count;
1583           }
1584         }
1585       } else {
1586         irn    = mumps->irn;
1587         jcn    = mumps->jcn;
1588         val    = mumps->val;
1589         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1590         remain = mumps->nnz - count;
1591         while (count>0) {
1592           PetscCallMPI(MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1593           PetscCallMPI(MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1594           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1595           irn    += count;
1596           jcn    += count;
1597           val    += count;
1598           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1599           remain -= count;
1600         }
1601       }
1602     } else {
1603       nreqs = 0;
1604       if (mumps->is_omp_master) {
1605         val = mumps->val + mumps->recvcount[0];
1606         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1607           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1608           remain = mumps->recvcount[i] - count;
1609           while (count>0) {
1610             PetscCallMPI(MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1611             val    += count;
1612             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1613             remain -= count;
1614           }
1615         }
1616       } else {
1617         val    = mumps->val;
1618         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1619         remain = mumps->nnz - count;
1620         while (count>0) {
1621           PetscCallMPI(MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]));
1622           val    += count;
1623           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1624           remain -= count;
1625         }
1626       }
1627     }
1628     PetscCallMPI(MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE));
1629     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1630   }
1631   PetscFunctionReturn(0);
1632 }
1633 
1634 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1635 {
1636   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1637   PetscBool      isMPIAIJ;
1638 
1639   PetscFunctionBegin;
1640   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1641     if (mumps->id.INFOG(1) == -6) {
1642       PetscCall(PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1643     }
1644     PetscCall(PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1645     PetscFunctionReturn(0);
1646   }
1647 
1648   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps));
1649   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps));
1650 
1651   /* numerical factorization phase */
1652   /*-------------------------------*/
1653   mumps->id.job = JOB_FACTNUMERIC;
1654   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1655     if (!mumps->myid) {
1656       mumps->id.a = (MumpsScalar*)mumps->val;
1657     }
1658   } else {
1659     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1660   }
1661   PetscMUMPS_c(mumps);
1662   if (mumps->id.INFOG(1) < 0) {
1663     if (A->erroriffailure) {
1664       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
1665     } else {
1666       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1667         PetscCall(PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1668         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1669       } else if (mumps->id.INFOG(1) == -13) {
1670         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1671         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1672       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1673         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1674         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1675       } else {
1676         PetscCall(PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1677         F->factorerrortype = MAT_FACTOR_OTHER;
1678       }
1679     }
1680   }
1681   PetscCheckFalse(!mumps->myid && mumps->id.ICNTL(16) > 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16));
1682 
1683   F->assembled    = PETSC_TRUE;
1684 
1685   if (F->schur) { /* reset Schur status to unfactored */
1686 #if defined(PETSC_HAVE_CUDA)
1687     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1688 #endif
1689     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1690       mumps->id.ICNTL(19) = 2;
1691       PetscCall(MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur));
1692     }
1693     PetscCall(MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED));
1694   }
1695 
1696   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1697   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1698 
1699   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1700   if (mumps->petsc_size > 1) {
1701     PetscInt    lsol_loc;
1702     PetscScalar *sol_loc;
1703 
1704     PetscCall(PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ));
1705 
1706     /* distributed solution; Create x_seq=sol_loc for repeated use */
1707     if (mumps->x_seq) {
1708       PetscCall(VecScatterDestroy(&mumps->scat_sol));
1709       PetscCall(PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc));
1710       PetscCall(VecDestroy(&mumps->x_seq));
1711     }
1712     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1713     PetscCall(PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc));
1714     mumps->id.lsol_loc = lsol_loc;
1715     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1716     PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq));
1717   }
1718   PetscCall(PetscLogFlops(mumps->id.RINFO(2)));
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 /* Sets MUMPS options from the options database */
1723 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1724 {
1725   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1726   PetscMUMPSInt  icntl=0;
1727   PetscInt       info[80],i,ninfo=80,rbs,cbs;
1728   PetscBool      flg=PETSC_FALSE;
1729 
1730   PetscFunctionBegin;
1731   PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");
1732   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg));
1733   if (flg) mumps->id.ICNTL(1) = icntl;
1734   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg));
1735   if (flg) mumps->id.ICNTL(2) = icntl;
1736   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg));
1737   if (flg) mumps->id.ICNTL(3) = icntl;
1738 
1739   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg));
1740   if (flg) mumps->id.ICNTL(4) = icntl;
1741   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1742 
1743   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg));
1744   if (flg) mumps->id.ICNTL(6) = icntl;
1745 
1746   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg));
1747   if (flg) {
1748     PetscCheckFalse(icntl == 1 || icntl < 0 || icntl > 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1749     mumps->id.ICNTL(7) = icntl;
1750   }
1751 
1752   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL));
1753   /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */
1754   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL));
1755   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL));
1756   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL));
1757   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL));
1758   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL));
1759   PetscCall(MatGetBlockSizes(A,&rbs,&cbs));
1760   if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = -rbs;
1761   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15","ICNTL(15): compression of the input matrix resulting from a block format","None",mumps->id.ICNTL(15),&mumps->id.ICNTL(15),&flg));
1762   if (flg) {
1763     PetscCheck(mumps->id.ICNTL(15) <= 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"Positive -mat_mumps_icntl_15 not handled");
1764     PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0),PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes");
1765   }
1766   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL));
1767   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1768     PetscCall(MatDestroy(&F->schur));
1769     PetscCall(MatMumpsResetSchur_Private(mumps));
1770   }
1771 
1772   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1773      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1774      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1775      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1776      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1777      In short, we could not use distributed RHS with MPICH until v4.0b1.
1778    */
1779 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1780   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1781 #else
1782   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1783 #endif
1784   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg));
1785   PetscCheckFalse(flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20);
1786 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1787   PetscCheckFalse(flg && mumps->ICNTL20 == 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1788 #endif
1789   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */
1790 
1791   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL));
1792   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL));
1793   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL));
1794   if (mumps->id.ICNTL(24)) {
1795     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1796   }
1797 
1798   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL));
1799   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL));
1800   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL));
1801   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL));
1802   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL));
1803   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */
1804   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL));
1805   /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL));  -- not supported by PETSc API */
1806   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL));
1807   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL));
1808   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL));
1809   PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL));
1810 
1811   PetscCall(PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL));
1812   PetscCall(PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL));
1813   PetscCall(PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL));
1814   PetscCall(PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL));
1815   PetscCall(PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL));
1816   PetscCall(PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL));
1817 
1818   PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL));
1819 
1820   PetscCall(PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL));
1821   if (ninfo) {
1822     PetscCheck(ninfo <= 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo);
1823     PetscCall(PetscMalloc1(ninfo,&mumps->info));
1824     mumps->ninfo = ninfo;
1825     for (i=0; i<ninfo; i++) {
1826       PetscCheckFalse(info[i] < 0 || info[i]>80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo);
1827       else  mumps->info[i] = info[i];
1828     }
1829   }
1830 
1831   PetscOptionsEnd();
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1836 {
1837   PetscInt       nthreads=0;
1838 
1839   PetscFunctionBegin;
1840   mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1841   PetscCallMPI(MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size));
1842   PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm,&mumps->myid));/* "if (!myid)" still works even if mumps_comm is different */
1843 
1844   PetscCall(PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support));
1845   if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1846   PetscCall(PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL));
1847   if (mumps->use_petsc_omp_support) {
1848 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1849     PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl));
1850     PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master));
1851 #else
1852     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:"");
1853 #endif
1854   } else {
1855     mumps->omp_comm      = PETSC_COMM_SELF;
1856     mumps->mumps_comm    = mumps->petsc_comm;
1857     mumps->is_omp_master = PETSC_TRUE;
1858   }
1859   PetscCallMPI(MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size));
1860   mumps->reqs = NULL;
1861   mumps->tag  = 0;
1862 
1863   /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1864   if (mumps->mumps_comm != MPI_COMM_NULL) {
1865     PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm));
1866   }
1867 
1868   mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1869   mumps->id.job = JOB_INIT;
1870   mumps->id.par = 1;  /* host participates factorizaton and solve */
1871   mumps->id.sym = mumps->sym;
1872 
1873   PetscMUMPS_c(mumps);
1874   PetscCheck(mumps->id.INFOG(1) >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
1875 
1876   /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1877      For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1878    */
1879   PetscCallMPI(MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm));
1880   PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm));
1881 
1882   mumps->scat_rhs = NULL;
1883   mumps->scat_sol = NULL;
1884 
1885   /* set PETSc-MUMPS default options - override MUMPS default */
1886   mumps->id.ICNTL(3) = 0;
1887   mumps->id.ICNTL(4) = 0;
1888   if (mumps->petsc_size == 1) {
1889     mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1890     mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1891   } else {
1892     mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1893     mumps->id.ICNTL(21) = 1;   /* distributed solution */
1894   }
1895 
1896   /* schur */
1897   mumps->id.size_schur    = 0;
1898   mumps->id.listvar_schur = NULL;
1899   mumps->id.schur         = NULL;
1900   mumps->sizeredrhs       = 0;
1901   mumps->schur_sol        = NULL;
1902   mumps->schur_sizesol    = 0;
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1907 {
1908   PetscFunctionBegin;
1909   if (mumps->id.INFOG(1) < 0) {
1910     if (A->erroriffailure) {
1911       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1));
1912     } else {
1913       if (mumps->id.INFOG(1) == -6) {
1914         PetscCall(PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1915         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1916       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1917         PetscCall(PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1918         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1919       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1920         PetscCall(PetscInfo(F,"Empty matrix\n"));
1921       } else {
1922         PetscCall(PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2)));
1923         F->factorerrortype = MAT_FACTOR_OTHER;
1924       }
1925     }
1926   }
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1931 {
1932   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1933   Vec            b;
1934   const PetscInt M = A->rmap->N;
1935 
1936   PetscFunctionBegin;
1937   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1938     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1939     PetscFunctionReturn(0);
1940   }
1941 
1942   /* Set MUMPS options from the options database */
1943   PetscCall(PetscSetMUMPSFromOptions(F,A));
1944 
1945   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
1946   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
1947 
1948   /* analysis phase */
1949   /*----------------*/
1950   mumps->id.job = JOB_FACTSYMBOLIC;
1951   mumps->id.n   = M;
1952   switch (mumps->id.ICNTL(18)) {
1953   case 0:  /* centralized assembled matrix input */
1954     if (!mumps->myid) {
1955       mumps->id.nnz = mumps->nnz;
1956       mumps->id.irn = mumps->irn;
1957       mumps->id.jcn = mumps->jcn;
1958       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1959       if (r) {
1960         mumps->id.ICNTL(7) = 1;
1961         if (!mumps->myid) {
1962           const PetscInt *idx;
1963           PetscInt       i;
1964 
1965           PetscCall(PetscMalloc1(M,&mumps->id.perm_in));
1966           PetscCall(ISGetIndices(r,&idx));
1967           for (i=0; i<M; i++) PetscCall(PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]))); /* perm_in[]: start from 1, not 0! */
1968           PetscCall(ISRestoreIndices(r,&idx));
1969         }
1970       }
1971     }
1972     break;
1973   case 3:  /* distributed assembled matrix input (size>1) */
1974     mumps->id.nnz_loc = mumps->nnz;
1975     mumps->id.irn_loc = mumps->irn;
1976     mumps->id.jcn_loc = mumps->jcn;
1977     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1978     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1979       PetscCall(MatCreateVecs(A,NULL,&b));
1980       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
1981       PetscCall(VecDestroy(&b));
1982     }
1983     break;
1984   }
1985   PetscMUMPS_c(mumps);
1986   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
1987 
1988   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
1989   F->ops->solve           = MatSolve_MUMPS;
1990   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
1991   F->ops->matsolve        = MatMatSolve_MUMPS;
1992   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
1993 
1994   mumps->matstruc = SAME_NONZERO_PATTERN;
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 /* Note the Petsc r and c permutations are ignored */
1999 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2000 {
2001   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2002   Vec            b;
2003   const PetscInt M = A->rmap->N;
2004 
2005   PetscFunctionBegin;
2006   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2007     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2008     PetscFunctionReturn(0);
2009   }
2010 
2011   /* Set MUMPS options from the options database */
2012   PetscCall(PetscSetMUMPSFromOptions(F,A));
2013 
2014   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2015   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2016 
2017   /* analysis phase */
2018   /*----------------*/
2019   mumps->id.job = JOB_FACTSYMBOLIC;
2020   mumps->id.n   = M;
2021   switch (mumps->id.ICNTL(18)) {
2022   case 0:  /* centralized assembled matrix input */
2023     if (!mumps->myid) {
2024       mumps->id.nnz = mumps->nnz;
2025       mumps->id.irn = mumps->irn;
2026       mumps->id.jcn = mumps->jcn;
2027       if (mumps->id.ICNTL(6)>1) {
2028         mumps->id.a = (MumpsScalar*)mumps->val;
2029       }
2030     }
2031     break;
2032   case 3:  /* distributed assembled matrix input (size>1) */
2033     mumps->id.nnz_loc = mumps->nnz;
2034     mumps->id.irn_loc = mumps->irn;
2035     mumps->id.jcn_loc = mumps->jcn;
2036     if (mumps->id.ICNTL(6)>1) {
2037       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2038     }
2039     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2040       PetscCall(MatCreateVecs(A,NULL,&b));
2041       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2042       PetscCall(VecDestroy(&b));
2043     }
2044     break;
2045   }
2046   PetscMUMPS_c(mumps);
2047   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2048 
2049   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2050   F->ops->solve           = MatSolve_MUMPS;
2051   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2052 
2053   mumps->matstruc = SAME_NONZERO_PATTERN;
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 /* Note the Petsc r permutation and factor info are ignored */
2058 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2059 {
2060   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2061   Vec            b;
2062   const PetscInt M = A->rmap->N;
2063 
2064   PetscFunctionBegin;
2065   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2066     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2067     PetscFunctionReturn(0);
2068   }
2069 
2070   /* Set MUMPS options from the options database */
2071   PetscCall(PetscSetMUMPSFromOptions(F,A));
2072 
2073   PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps));
2074   PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps));
2075 
2076   /* analysis phase */
2077   /*----------------*/
2078   mumps->id.job = JOB_FACTSYMBOLIC;
2079   mumps->id.n   = M;
2080   switch (mumps->id.ICNTL(18)) {
2081   case 0:  /* centralized assembled matrix input */
2082     if (!mumps->myid) {
2083       mumps->id.nnz = mumps->nnz;
2084       mumps->id.irn = mumps->irn;
2085       mumps->id.jcn = mumps->jcn;
2086       if (mumps->id.ICNTL(6)>1) {
2087         mumps->id.a = (MumpsScalar*)mumps->val;
2088       }
2089     }
2090     break;
2091   case 3:  /* distributed assembled matrix input (size>1) */
2092     mumps->id.nnz_loc = mumps->nnz;
2093     mumps->id.irn_loc = mumps->irn;
2094     mumps->id.jcn_loc = mumps->jcn;
2095     if (mumps->id.ICNTL(6)>1) {
2096       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2097     }
2098     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2099       PetscCall(MatCreateVecs(A,NULL,&b));
2100       PetscCall(VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq));
2101       PetscCall(VecDestroy(&b));
2102     }
2103     break;
2104   }
2105   PetscMUMPS_c(mumps);
2106   PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps));
2107 
2108   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2109   F->ops->solve                 = MatSolve_MUMPS;
2110   F->ops->solvetranspose        = MatSolve_MUMPS;
2111   F->ops->matsolve              = MatMatSolve_MUMPS;
2112   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2113 #if defined(PETSC_USE_COMPLEX)
2114   F->ops->getinertia = NULL;
2115 #else
2116   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2117 #endif
2118 
2119   mumps->matstruc = SAME_NONZERO_PATTERN;
2120   PetscFunctionReturn(0);
2121 }
2122 
2123 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2124 {
2125   PetscBool         iascii;
2126   PetscViewerFormat format;
2127   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2128 
2129   PetscFunctionBegin;
2130   /* check if matrix is mumps type */
2131   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2132 
2133   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
2134   if (iascii) {
2135     PetscCall(PetscViewerGetFormat(viewer,&format));
2136     if (format == PETSC_VIEWER_ASCII_INFO) {
2137       PetscCall(PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n"));
2138       PetscCall(PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d\n",mumps->id.sym));
2139       PetscCall(PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d\n",mumps->id.par));
2140       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d\n",mumps->id.ICNTL(1)));
2141       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2)));
2142       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d\n",mumps->id.ICNTL(3)));
2143       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d\n",mumps->id.ICNTL(4)));
2144       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d\n",mumps->id.ICNTL(5)));
2145       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d\n",mumps->id.ICNTL(6)));
2146       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7)));
2147       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d\n",mumps->id.ICNTL(8)));
2148       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d\n",mumps->id.ICNTL(10)));
2149       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d\n",mumps->id.ICNTL(11)));
2150       if (mumps->id.ICNTL(11)>0) {
2151         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4)));
2152         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5)));
2153         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6)));
2154         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8)));
2155         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9)));
2156         PetscCall(PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11)));
2157       }
2158       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d\n",mumps->id.ICNTL(12)));
2159       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d\n",mumps->id.ICNTL(13)));
2160       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14)));
2161       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(15) (compression of the input matrix):            %d\n",mumps->id.ICNTL(15)));
2162       /* ICNTL(15-17) not used */
2163       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d\n",mumps->id.ICNTL(18)));
2164       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d\n",mumps->id.ICNTL(19)));
2165       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d\n",mumps->id.ICNTL(20)));
2166       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d\n",mumps->id.ICNTL(21)));
2167       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d\n",mumps->id.ICNTL(22)));
2168       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23)));
2169 
2170       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d\n",mumps->id.ICNTL(24)));
2171       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d\n",mumps->id.ICNTL(25)));
2172       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d\n",mumps->id.ICNTL(26)));
2173       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d\n",mumps->id.ICNTL(27)));
2174       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d\n",mumps->id.ICNTL(28)));
2175       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d\n",mumps->id.ICNTL(29)));
2176 
2177       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n",mumps->id.ICNTL(30)));
2178       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d\n",mumps->id.ICNTL(31)));
2179       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d\n",mumps->id.ICNTL(33)));
2180       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d\n",mumps->id.ICNTL(35)));
2181       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d\n",mumps->id.ICNTL(36)));
2182       PetscCall(PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d\n",mumps->id.ICNTL(38)));
2183 
2184       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1)));
2185       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2)));
2186       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3)));
2187       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4)));
2188       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5)));
2189       PetscCall(PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7)));
2190 
2191       /* information local to each processor */
2192       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n"));
2193       PetscCall(PetscViewerASCIIPushSynchronized(viewer));
2194       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1)));
2195       PetscCall(PetscViewerFlush(viewer));
2196       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n"));
2197       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2)));
2198       PetscCall(PetscViewerFlush(viewer));
2199       PetscCall(PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n"));
2200       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3)));
2201       PetscCall(PetscViewerFlush(viewer));
2202 
2203       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n"));
2204       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d\n",mumps->myid,mumps->id.INFO(15)));
2205       PetscCall(PetscViewerFlush(viewer));
2206 
2207       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n"));
2208       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(16)));
2209       PetscCall(PetscViewerFlush(viewer));
2210 
2211       PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n"));
2212       PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(23)));
2213       PetscCall(PetscViewerFlush(viewer));
2214 
2215       if (mumps->ninfo && mumps->ninfo <= 80) {
2216         PetscInt i;
2217         for (i=0; i<mumps->ninfo; i++) {
2218           PetscCall(PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "): \n",mumps->info[i]));
2219           PetscCall(PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i])));
2220           PetscCall(PetscViewerFlush(viewer));
2221         }
2222       }
2223       PetscCall(PetscViewerASCIIPopSynchronized(viewer));
2224 
2225       if (!mumps->myid) { /* information from the host */
2226         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1)));
2227         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2)));
2228         PetscCall(PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3)));
2229         PetscCall(PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34)));
2230 
2231         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3)));
2232         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4)));
2233         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5)));
2234         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6)));
2235         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7)));
2236         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8)));
2237         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9)));
2238         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10)));
2239         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11)));
2240         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12)));
2241         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13)));
2242         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14)));
2243         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15)));
2244         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16)));
2245         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17)));
2246         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18)));
2247         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19)));
2248         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20)));
2249         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21)));
2250         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22)));
2251         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23)));
2252         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24)));
2253         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25)));
2254         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28)));
2255         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29)));
2256         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31)));
2257         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32)));
2258         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33)));
2259         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34)));
2260         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35)));
2261         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36)));
2262         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37)));
2263         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38)));
2264         PetscCall(PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39)));
2265       }
2266     }
2267   }
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2272 {
2273   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2274 
2275   PetscFunctionBegin;
2276   info->block_size        = 1.0;
2277   info->nz_allocated      = mumps->id.INFOG(20);
2278   info->nz_used           = mumps->id.INFOG(20);
2279   info->nz_unneeded       = 0.0;
2280   info->assemblies        = 0.0;
2281   info->mallocs           = 0.0;
2282   info->memory            = 0.0;
2283   info->fill_ratio_given  = 0;
2284   info->fill_ratio_needed = 0;
2285   info->factor_mallocs    = 0;
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 /* -------------------------------------------------------------------------------------------*/
2290 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2291 {
2292   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2293   const PetscScalar *arr;
2294   const PetscInt    *idxs;
2295   PetscInt          size,i;
2296 
2297   PetscFunctionBegin;
2298   PetscCall(ISGetLocalSize(is,&size));
2299   if (mumps->petsc_size > 1) {
2300     PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2301 
2302     ls   = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2303     PetscCallMPI(MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm));
2304     PetscCheck(gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc");
2305   }
2306 
2307   /* Schur complement matrix */
2308   PetscCall(MatDestroy(&F->schur));
2309   PetscCall(MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur));
2310   PetscCall(MatDenseGetArrayRead(F->schur,&arr));
2311   mumps->id.schur      = (MumpsScalar*)arr;
2312   mumps->id.size_schur = size;
2313   mumps->id.schur_lld  = size;
2314   PetscCall(MatDenseRestoreArrayRead(F->schur,&arr));
2315   if (mumps->sym == 1) {
2316     PetscCall(MatSetOption(F->schur,MAT_SPD,PETSC_TRUE));
2317   }
2318 
2319   /* MUMPS expects Fortran style indices */
2320   PetscCall(PetscFree(mumps->id.listvar_schur));
2321   PetscCall(PetscMalloc1(size,&mumps->id.listvar_schur));
2322   PetscCall(ISGetIndices(is,&idxs));
2323   for (i=0; i<size; i++) PetscCall(PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i])));
2324   PetscCall(ISRestoreIndices(is,&idxs));
2325   if (mumps->petsc_size > 1) {
2326     mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2327   } else {
2328     if (F->factortype == MAT_FACTOR_LU) {
2329       mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2330     } else {
2331       mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2332     }
2333   }
2334   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2335   mumps->id.ICNTL(26) = -1;
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 /* -------------------------------------------------------------------------------------------*/
2340 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2341 {
2342   Mat            St;
2343   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2344   PetscScalar    *array;
2345 #if defined(PETSC_USE_COMPLEX)
2346   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2347 #endif
2348 
2349   PetscFunctionBegin;
2350   PetscCheck(mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2351   PetscCall(MatCreate(PETSC_COMM_SELF,&St));
2352   PetscCall(MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur));
2353   PetscCall(MatSetType(St,MATDENSE));
2354   PetscCall(MatSetUp(St));
2355   PetscCall(MatDenseGetArray(St,&array));
2356   if (!mumps->sym) { /* MUMPS always return a full matrix */
2357     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2358       PetscInt i,j,N=mumps->id.size_schur;
2359       for (i=0;i<N;i++) {
2360         for (j=0;j<N;j++) {
2361 #if !defined(PETSC_USE_COMPLEX)
2362           PetscScalar val = mumps->id.schur[i*N+j];
2363 #else
2364           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2365 #endif
2366           array[j*N+i] = val;
2367         }
2368       }
2369     } else { /* stored by columns */
2370       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2371     }
2372   } else { /* either full or lower-triangular (not packed) */
2373     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2374       PetscInt i,j,N=mumps->id.size_schur;
2375       for (i=0;i<N;i++) {
2376         for (j=i;j<N;j++) {
2377 #if !defined(PETSC_USE_COMPLEX)
2378           PetscScalar val = mumps->id.schur[i*N+j];
2379 #else
2380           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2381 #endif
2382           array[i*N+j] = val;
2383           array[j*N+i] = val;
2384         }
2385       }
2386     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2387       PetscCall(PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur));
2388     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2389       PetscInt i,j,N=mumps->id.size_schur;
2390       for (i=0;i<N;i++) {
2391         for (j=0;j<i+1;j++) {
2392 #if !defined(PETSC_USE_COMPLEX)
2393           PetscScalar val = mumps->id.schur[i*N+j];
2394 #else
2395           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2396 #endif
2397           array[i*N+j] = val;
2398           array[j*N+i] = val;
2399         }
2400       }
2401     }
2402   }
2403   PetscCall(MatDenseRestoreArray(St,&array));
2404   *S   = St;
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 /* -------------------------------------------------------------------------------------------*/
2409 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2410 {
2411   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2412 
2413   PetscFunctionBegin;
2414   PetscCall(PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl)));
2415   PetscFunctionReturn(0);
2416 }
2417 
2418 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2419 {
2420   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2421 
2422   PetscFunctionBegin;
2423   *ival = mumps->id.ICNTL(icntl);
2424   PetscFunctionReturn(0);
2425 }
2426 
2427 /*@
2428   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2429 
2430    Logically Collective on Mat
2431 
2432    Input Parameters:
2433 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2434 .  icntl - index of MUMPS parameter array ICNTL()
2435 -  ival - value of MUMPS ICNTL(icntl)
2436 
2437   Options Database:
2438 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2439 
2440    Level: beginner
2441 
2442    References:
2443 .  * - MUMPS Users' Guide
2444 
2445 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2446 @*/
2447 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2448 {
2449   PetscFunctionBegin;
2450   PetscValidType(F,1);
2451   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2452   PetscValidLogicalCollectiveInt(F,icntl,2);
2453   PetscValidLogicalCollectiveInt(F,ival,3);
2454   PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 /*@
2459   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2460 
2461    Logically Collective on Mat
2462 
2463    Input Parameters:
2464 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2465 -  icntl - index of MUMPS parameter array ICNTL()
2466 
2467   Output Parameter:
2468 .  ival - value of MUMPS ICNTL(icntl)
2469 
2470    Level: beginner
2471 
2472    References:
2473 .  * - MUMPS Users' Guide
2474 
2475 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2476 @*/
2477 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2478 {
2479   PetscFunctionBegin;
2480   PetscValidType(F,1);
2481   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2482   PetscValidLogicalCollectiveInt(F,icntl,2);
2483   PetscValidIntPointer(ival,3);
2484   PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2485   PetscFunctionReturn(0);
2486 }
2487 
2488 /* -------------------------------------------------------------------------------------------*/
2489 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2490 {
2491   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2492 
2493   PetscFunctionBegin;
2494   mumps->id.CNTL(icntl) = val;
2495   PetscFunctionReturn(0);
2496 }
2497 
2498 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2499 {
2500   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2501 
2502   PetscFunctionBegin;
2503   *val = mumps->id.CNTL(icntl);
2504   PetscFunctionReturn(0);
2505 }
2506 
2507 /*@
2508   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2509 
2510    Logically Collective on Mat
2511 
2512    Input Parameters:
2513 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2514 .  icntl - index of MUMPS parameter array CNTL()
2515 -  val - value of MUMPS CNTL(icntl)
2516 
2517   Options Database:
2518 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2519 
2520    Level: beginner
2521 
2522    References:
2523 .  * - MUMPS Users' Guide
2524 
2525 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2526 @*/
2527 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2528 {
2529   PetscFunctionBegin;
2530   PetscValidType(F,1);
2531   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2532   PetscValidLogicalCollectiveInt(F,icntl,2);
2533   PetscValidLogicalCollectiveReal(F,val,3);
2534   PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 /*@
2539   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2540 
2541    Logically Collective on Mat
2542 
2543    Input Parameters:
2544 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2545 -  icntl - index of MUMPS parameter array CNTL()
2546 
2547   Output Parameter:
2548 .  val - value of MUMPS CNTL(icntl)
2549 
2550    Level: beginner
2551 
2552    References:
2553 .  * - MUMPS Users' Guide
2554 
2555 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2556 @*/
2557 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2558 {
2559   PetscFunctionBegin;
2560   PetscValidType(F,1);
2561   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2562   PetscValidLogicalCollectiveInt(F,icntl,2);
2563   PetscValidRealPointer(val,3);
2564   PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2569 {
2570   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2571 
2572   PetscFunctionBegin;
2573   *info = mumps->id.INFO(icntl);
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2578 {
2579   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2580 
2581   PetscFunctionBegin;
2582   *infog = mumps->id.INFOG(icntl);
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2587 {
2588   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2589 
2590   PetscFunctionBegin;
2591   *rinfo = mumps->id.RINFO(icntl);
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2596 {
2597   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2598 
2599   PetscFunctionBegin;
2600   *rinfog = mumps->id.RINFOG(icntl);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2605 {
2606   Mat            Bt = NULL,Btseq = NULL;
2607   PetscBool      flg;
2608   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2609   PetscScalar    *aa;
2610   PetscInt       spnr,*ia,*ja,M,nrhs;
2611 
2612   PetscFunctionBegin;
2613   PetscValidPointer(spRHS,2);
2614   PetscCall(PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg));
2615   if (flg) {
2616     PetscCall(MatTransposeGetMat(spRHS,&Bt));
2617   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2618 
2619   PetscCall(MatMumpsSetIcntl(F,30,1));
2620 
2621   if (mumps->petsc_size > 1) {
2622     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2623     Btseq = b->A;
2624   } else {
2625     Btseq = Bt;
2626   }
2627 
2628   PetscCall(MatGetSize(spRHS,&M,&nrhs));
2629   mumps->id.nrhs = nrhs;
2630   mumps->id.lrhs = M;
2631   mumps->id.rhs  = NULL;
2632 
2633   if (!mumps->myid) {
2634     PetscCall(MatSeqAIJGetArray(Btseq,&aa));
2635     PetscCall(MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2636     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2637     PetscCall(PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs));
2638     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2639   } else {
2640     mumps->id.irhs_ptr    = NULL;
2641     mumps->id.irhs_sparse = NULL;
2642     mumps->id.nz_rhs      = 0;
2643     mumps->id.rhs_sparse  = NULL;
2644   }
2645   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2646   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2647 
2648   /* solve phase */
2649   /*-------------*/
2650   mumps->id.job = JOB_SOLVE;
2651   PetscMUMPS_c(mumps);
2652   if (mumps->id.INFOG(1) < 0)
2653     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
2654 
2655   if (!mumps->myid) {
2656     PetscCall(MatSeqAIJRestoreArray(Btseq,&aa));
2657     PetscCall(MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg));
2658     PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2659   }
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 /*@
2664   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2665 
2666    Logically Collective on Mat
2667 
2668    Input Parameters:
2669 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2670 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2671 
2672   Output Parameter:
2673 . spRHS - requested entries of inverse of A
2674 
2675    Level: beginner
2676 
2677    References:
2678 .  * - MUMPS Users' Guide
2679 
2680 .seealso: MatGetFactor(), MatCreateTranspose()
2681 @*/
2682 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2683 {
2684   PetscFunctionBegin;
2685   PetscValidType(F,1);
2686   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2687   PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));
2688   PetscFunctionReturn(0);
2689 }
2690 
2691 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2692 {
2693   Mat            spRHS;
2694 
2695   PetscFunctionBegin;
2696   PetscCall(MatCreateTranspose(spRHST,&spRHS));
2697   PetscCall(MatMumpsGetInverse_MUMPS(F,spRHS));
2698   PetscCall(MatDestroy(&spRHS));
2699   PetscFunctionReturn(0);
2700 }
2701 
2702 /*@
2703   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2704 
2705    Logically Collective on Mat
2706 
2707    Input Parameters:
2708 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2709 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2710 
2711   Output Parameter:
2712 . spRHST - requested entries of inverse of A^T
2713 
2714    Level: beginner
2715 
2716    References:
2717 .  * - MUMPS Users' Guide
2718 
2719 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2720 @*/
2721 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2722 {
2723   PetscBool      flg;
2724 
2725   PetscFunctionBegin;
2726   PetscValidType(F,1);
2727   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2728   PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL));
2729   PetscCheck(flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2730 
2731   PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));
2732   PetscFunctionReturn(0);
2733 }
2734 
2735 /*@
2736   MatMumpsGetInfo - Get MUMPS parameter INFO()
2737 
2738    Logically Collective on Mat
2739 
2740    Input Parameters:
2741 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2742 -  icntl - index of MUMPS parameter array INFO()
2743 
2744   Output Parameter:
2745 .  ival - value of MUMPS INFO(icntl)
2746 
2747    Level: beginner
2748 
2749    References:
2750 .  * - MUMPS Users' Guide
2751 
2752 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2753 @*/
2754 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2755 {
2756   PetscFunctionBegin;
2757   PetscValidType(F,1);
2758   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2759   PetscValidIntPointer(ival,3);
2760   PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2761   PetscFunctionReturn(0);
2762 }
2763 
2764 /*@
2765   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2766 
2767    Logically Collective on Mat
2768 
2769    Input Parameters:
2770 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2771 -  icntl - index of MUMPS parameter array INFOG()
2772 
2773   Output Parameter:
2774 .  ival - value of MUMPS INFOG(icntl)
2775 
2776    Level: beginner
2777 
2778    References:
2779 .  * - MUMPS Users' Guide
2780 
2781 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2782 @*/
2783 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2784 {
2785   PetscFunctionBegin;
2786   PetscValidType(F,1);
2787   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2788   PetscValidIntPointer(ival,3);
2789   PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 /*@
2794   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2795 
2796    Logically Collective on Mat
2797 
2798    Input Parameters:
2799 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2800 -  icntl - index of MUMPS parameter array RINFO()
2801 
2802   Output Parameter:
2803 .  val - value of MUMPS RINFO(icntl)
2804 
2805    Level: beginner
2806 
2807    References:
2808 .  * - MUMPS Users' Guide
2809 
2810 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2811 @*/
2812 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2813 {
2814   PetscFunctionBegin;
2815   PetscValidType(F,1);
2816   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2817   PetscValidRealPointer(val,3);
2818   PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2819   PetscFunctionReturn(0);
2820 }
2821 
2822 /*@
2823   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2824 
2825    Logically Collective on Mat
2826 
2827    Input Parameters:
2828 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2829 -  icntl - index of MUMPS parameter array RINFOG()
2830 
2831   Output Parameter:
2832 .  val - value of MUMPS RINFOG(icntl)
2833 
2834    Level: beginner
2835 
2836    References:
2837 .  * - MUMPS Users' Guide
2838 
2839 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2840 @*/
2841 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2842 {
2843   PetscFunctionBegin;
2844   PetscValidType(F,1);
2845   PetscCheck(F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2846   PetscValidRealPointer(val,3);
2847   PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));
2848   PetscFunctionReturn(0);
2849 }
2850 
2851 /*MC
2852   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2853   distributed and sequential matrices via the external package MUMPS.
2854 
2855   Works with MATAIJ and MATSBAIJ matrices
2856 
2857   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2858 
2859   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2860 
2861   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2862 
2863   Options Database Keys:
2864 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2865 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2866 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2867 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2868 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2869 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2870                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2871 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2872 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2873 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2874 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2875 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2876 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2877 .  -mat_mumps_icntl_15  - ICNTL(15): compression of the input matrix resulting from a block format
2878 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2879 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2880 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2881 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2882 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2883 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2884 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2885 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2886 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2887 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2888 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2889 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2890 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2891 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2892 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2893 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2894 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2895 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2896 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2897 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2898 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2899 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2900                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2901 
2902   Level: beginner
2903 
2904     Notes:
2905     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2906 
2907     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2908 $          KSPGetPC(ksp,&pc);
2909 $          PCFactorGetMatrix(pc,&mat);
2910 $          MatMumpsGetInfo(mat,....);
2911 $          MatMumpsGetInfog(mat,....); etc.
2912            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2913 
2914   Using MUMPS with 64-bit integers
2915     MUMPS provides 64-bit integer support in two build modes:
2916       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2917       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2918 
2919       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2920       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2921       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2922       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2923 
2924     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2925 
2926   Two modes to run MUMPS/PETSc with OpenMP
2927 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2928 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2929 
2930 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2931 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2932 
2933    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2934    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2935    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2936    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2937    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2938 
2939    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2940    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2941    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2942    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2943    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2944    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2945    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2946    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2947    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2948    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2949    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2950    examine the mapping result.
2951 
2952    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2953    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2954    calls omp_set_num_threads(m) internally before calling MUMPS.
2955 
2956    References:
2957 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2958 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2959 
2960 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix()
2961 
2962 M*/
2963 
2964 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2965 {
2966   PetscFunctionBegin;
2967   *type = MATSOLVERMUMPS;
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 /* MatGetFactor for Seq and MPI AIJ matrices */
2972 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
2973 {
2974   Mat            B;
2975   Mat_MUMPS      *mumps;
2976   PetscBool      isSeqAIJ;
2977   PetscMPIInt    size;
2978 
2979   PetscFunctionBegin;
2980  #if defined(PETSC_USE_COMPLEX)
2981   PetscCheckFalse(A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
2982  #endif
2983   /* Create the factorization matrix */
2984   PetscCall(PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ));
2985   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
2986   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
2987   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
2988   PetscCall(MatSetUp(B));
2989 
2990   PetscCall(PetscNewLog(B,&mumps));
2991 
2992   B->ops->view    = MatView_MUMPS;
2993   B->ops->getinfo = MatGetInfo_MUMPS;
2994 
2995   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
2996   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
2997   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
2998   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
2999   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3000   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3001   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3002   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3003   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3004   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3005   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3006   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3007   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3008 
3009   if (ftype == MAT_FACTOR_LU) {
3010     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3011     B->factortype            = MAT_FACTOR_LU;
3012     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3013     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3014     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3015     mumps->sym = 0;
3016   } else {
3017     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3018     B->factortype                  = MAT_FACTOR_CHOLESKY;
3019     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3020     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3021     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3022 #if defined(PETSC_USE_COMPLEX)
3023     mumps->sym = 2;
3024 #else
3025     if (A->spd_set && A->spd) mumps->sym = 1;
3026     else                      mumps->sym = 2;
3027 #endif
3028   }
3029 
3030   /* set solvertype */
3031   PetscCall(PetscFree(B->solvertype));
3032   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3033   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3034   if (size == 1) {
3035     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3036     B->canuseordering = PETSC_TRUE;
3037   }
3038   B->ops->destroy = MatDestroy_MUMPS;
3039   B->data         = (void*)mumps;
3040 
3041   PetscCall(PetscInitializeMUMPS(A,mumps));
3042 
3043   *F = B;
3044   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3045   PetscFunctionReturn(0);
3046 }
3047 
3048 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3049 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3050 {
3051   Mat            B;
3052   Mat_MUMPS      *mumps;
3053   PetscBool      isSeqSBAIJ;
3054   PetscMPIInt    size;
3055 
3056   PetscFunctionBegin;
3057  #if defined(PETSC_USE_COMPLEX)
3058   PetscCheck(!A->hermitian || A->symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3059  #endif
3060   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3061   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3062   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3063   PetscCall(MatSetUp(B));
3064 
3065   PetscCall(PetscNewLog(B,&mumps));
3066   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ));
3067   if (isSeqSBAIJ) {
3068     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3069   } else {
3070     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3071   }
3072 
3073   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3074   B->ops->view                   = MatView_MUMPS;
3075   B->ops->getinfo                = MatGetInfo_MUMPS;
3076 
3077   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3078   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3079   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3080   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3081   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3082   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3083   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3084   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3085   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3086   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3087   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3088   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3089   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3090 
3091   B->factortype = MAT_FACTOR_CHOLESKY;
3092 #if defined(PETSC_USE_COMPLEX)
3093   mumps->sym = 2;
3094 #else
3095   if (A->spd_set && A->spd) mumps->sym = 1;
3096   else                      mumps->sym = 2;
3097 #endif
3098 
3099   /* set solvertype */
3100   PetscCall(PetscFree(B->solvertype));
3101   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3102   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3103   if (size == 1) {
3104     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3105     B->canuseordering = PETSC_TRUE;
3106   }
3107   PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]));
3108   B->ops->destroy = MatDestroy_MUMPS;
3109   B->data         = (void*)mumps;
3110 
3111   PetscCall(PetscInitializeMUMPS(A,mumps));
3112 
3113   *F = B;
3114   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3115   PetscFunctionReturn(0);
3116 }
3117 
3118 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3119 {
3120   Mat            B;
3121   Mat_MUMPS      *mumps;
3122   PetscBool      isSeqBAIJ;
3123   PetscMPIInt    size;
3124 
3125   PetscFunctionBegin;
3126   /* Create the factorization matrix */
3127   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ));
3128   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3129   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3130   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3131   PetscCall(MatSetUp(B));
3132 
3133   PetscCall(PetscNewLog(B,&mumps));
3134   if (ftype == MAT_FACTOR_LU) {
3135     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3136     B->factortype            = MAT_FACTOR_LU;
3137     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3138     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3139     mumps->sym = 0;
3140     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3141   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3142 
3143   B->ops->view        = MatView_MUMPS;
3144   B->ops->getinfo     = MatGetInfo_MUMPS;
3145 
3146   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3147   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3148   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3149   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3150   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3151   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3152   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3153   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3154   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3155   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3156   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3157   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS));
3158   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS));
3159 
3160   /* set solvertype */
3161   PetscCall(PetscFree(B->solvertype));
3162   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3163   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3164   if (size == 1) {
3165     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3166     B->canuseordering = PETSC_TRUE;
3167   }
3168   B->ops->destroy = MatDestroy_MUMPS;
3169   B->data         = (void*)mumps;
3170 
3171   PetscCall(PetscInitializeMUMPS(A,mumps));
3172 
3173   *F = B;
3174   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3175   PetscFunctionReturn(0);
3176 }
3177 
3178 /* MatGetFactor for Seq and MPI SELL matrices */
3179 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3180 {
3181   Mat            B;
3182   Mat_MUMPS      *mumps;
3183   PetscBool      isSeqSELL;
3184   PetscMPIInt    size;
3185 
3186   PetscFunctionBegin;
3187   /* Create the factorization matrix */
3188   PetscCall(PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL));
3189   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
3190   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
3191   PetscCall(PetscStrallocpy("mumps",&((PetscObject)B)->type_name));
3192   PetscCall(MatSetUp(B));
3193 
3194   PetscCall(PetscNewLog(B,&mumps));
3195 
3196   B->ops->view        = MatView_MUMPS;
3197   B->ops->getinfo     = MatGetInfo_MUMPS;
3198 
3199   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps));
3200   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS));
3201   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS));
3202   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS));
3203   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS));
3204   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS));
3205   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS));
3206   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS));
3207   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS));
3208   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS));
3209   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS));
3210 
3211   if (ftype == MAT_FACTOR_LU) {
3212     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3213     B->factortype            = MAT_FACTOR_LU;
3214     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3215     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3216     mumps->sym = 0;
3217     PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]));
3218   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3219 
3220   /* set solvertype */
3221   PetscCall(PetscFree(B->solvertype));
3222   PetscCall(PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype));
3223   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
3224   if (size == 1) {
3225     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3226     B->canuseordering = PETSC_TRUE;
3227   }
3228   B->ops->destroy = MatDestroy_MUMPS;
3229   B->data         = (void*)mumps;
3230 
3231   PetscCall(PetscInitializeMUMPS(A,mumps));
3232 
3233   *F = B;
3234   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3235   PetscFunctionReturn(0);
3236 }
3237 
3238 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3239 {
3240   PetscFunctionBegin;
3241   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3242   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3243   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3244   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3245   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3246   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps));
3247   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps));
3248   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps));
3249   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps));
3250   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps));
3251   PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps));
3252   PetscFunctionReturn(0);
3253 }
3254