xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision f155c23239e6d3f7c7ec79ff00b4f28519d0ce99)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30 
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45 
46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48    naming convention in PetscMPIInt, PetscBLASInt etc.
49 */
50 typedef MUMPS_INT PetscMUMPSInt;
51 
52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55   #endif
56 #else
57   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #endif
61 
62 #define MPIU_MUMPSINT             MPI_INT
63 #define PETSC_MUMPS_INT_MAX       2147483647
64 #define PETSC_MUMPS_INT_MIN       -2147483648
65 
66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67 static inline PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68 {
69   PetscFunctionBegin;
70   if (PetscDefined(USE_64BIT_INDICES)) {
71     PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
72   }
73   *b = (PetscMUMPSInt)(a);
74   PetscFunctionReturn(0);
75 }
76 
77 /* Put these utility routines here since they are only used in this file */
78 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
79 {
80   PetscErrorCode ierr;
81   PetscInt       myval;
82   PetscBool      myset;
83   PetscFunctionBegin;
84   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
85   ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr);
86   if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);}
87   if (set) *set = myset;
88   PetscFunctionReturn(0);
89 }
90 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
91 
92 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
93 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
94 #define PetscMUMPS_c(mumps) \
95   do { \
96     if (mumps->use_petsc_omp_support) { \
97       if (mumps->is_omp_master) { \
98         ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \
99         MUMPS_c(&mumps->id); \
100         ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \
101       } \
102       ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \
103       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
104          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
105          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
106          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
107       */ \
108       ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\
109       ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm);CHKERRMPI(ierr);\
110       ierr = MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\
111     } else { \
112       MUMPS_c(&mumps->id); \
113     } \
114   } while (0)
115 #else
116 #define PetscMUMPS_c(mumps) \
117   do { MUMPS_c(&mumps->id); } while (0)
118 #endif
119 
120 /* declare MumpsScalar */
121 #if defined(PETSC_USE_COMPLEX)
122 #if defined(PETSC_USE_REAL_SINGLE)
123 #define MumpsScalar mumps_complex
124 #else
125 #define MumpsScalar mumps_double_complex
126 #endif
127 #else
128 #define MumpsScalar PetscScalar
129 #endif
130 
131 /* macros s.t. indices match MUMPS documentation */
132 #define ICNTL(I) icntl[(I)-1]
133 #define CNTL(I) cntl[(I)-1]
134 #define INFOG(I) infog[(I)-1]
135 #define INFO(I) info[(I)-1]
136 #define RINFOG(I) rinfog[(I)-1]
137 #define RINFO(I) rinfo[(I)-1]
138 
139 typedef struct Mat_MUMPS Mat_MUMPS;
140 struct Mat_MUMPS {
141 #if defined(PETSC_USE_COMPLEX)
142 #if defined(PETSC_USE_REAL_SINGLE)
143   CMUMPS_STRUC_C id;
144 #else
145   ZMUMPS_STRUC_C id;
146 #endif
147 #else
148 #if defined(PETSC_USE_REAL_SINGLE)
149   SMUMPS_STRUC_C id;
150 #else
151   DMUMPS_STRUC_C id;
152 #endif
153 #endif
154 
155   MatStructure   matstruc;
156   PetscMPIInt    myid,petsc_size;
157   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
158   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
159   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
160   PetscMUMPSInt  sym;
161   MPI_Comm       mumps_comm;
162   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
163   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
164   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
165   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
166 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
167   PetscInt       *rhs_nrow,max_nrhs;
168   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
169   PetscScalar    *rhs_loc,*rhs_recvbuf;
170 #endif
171   Vec            b_seq,x_seq;
172   PetscInt       ninfo,*info;           /* which INFO to display */
173   PetscInt       sizeredrhs;
174   PetscScalar    *schur_sol;
175   PetscInt       schur_sizesol;
176   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
177   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
178   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
179 
180   /* stuff used by petsc/mumps OpenMP support*/
181   PetscBool      use_petsc_omp_support;
182   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
183   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
184   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
185   PetscMPIInt    tag,omp_comm_size;
186   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
187   MPI_Request    *reqs;
188 };
189 
190 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
191    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
192  */
193 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
194 {
195   PetscErrorCode ierr;
196   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
197 
198   PetscFunctionBegin;
199 #if defined(PETSC_USE_64BIT_INDICES)
200   {
201     PetscInt i;
202     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
203       ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
204       ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr);
205       mumps->cur_ilen = nrow+1;
206     }
207     if (nnz > mumps->cur_jlen) {
208       ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
209       ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr);
210       mumps->cur_jlen = nnz;
211     }
212     for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);}
213     for (i=0; i<nnz; i++)    {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);}
214     *ia_mumps = mumps->ia_alloc;
215     *ja_mumps = mumps->ja_alloc;
216   }
217 #else
218   *ia_mumps = ia;
219   *ja_mumps = ja;
220 #endif
221   ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr);
222   PetscFunctionReturn(0);
223 }
224 
225 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
226 {
227   PetscErrorCode ierr;
228 
229   PetscFunctionBegin;
230   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
231   ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
232   ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
233   mumps->id.size_schur = 0;
234   mumps->id.schur_lld  = 0;
235   mumps->id.ICNTL(19)  = 0;
236   PetscFunctionReturn(0);
237 }
238 
239 /* solve with rhs in mumps->id.redrhs and return in the same location */
240 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
241 {
242   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
243   Mat                  S,B,X;
244   MatFactorSchurStatus schurstatus;
245   PetscInt             sizesol;
246   PetscErrorCode       ierr;
247 
248   PetscFunctionBegin;
249   ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr);
250   ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr);
251   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr);
252   ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr);
253 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
254   ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr);
255 #endif
256   switch (schurstatus) {
257   case MAT_FACTOR_SCHUR_FACTORED:
258     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr);
259     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
260 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
261     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
262 #endif
263     if (!mumps->id.ICNTL(9)) { /* transpose solve */
264       ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr);
265     } else {
266       ierr = MatMatSolve(S,B,X);CHKERRQ(ierr);
267     }
268     break;
269   case MAT_FACTOR_SCHUR_INVERTED:
270     sizesol = mumps->id.nrhs*mumps->id.size_schur;
271     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
272       ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
273       ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr);
274       mumps->schur_sizesol = sizesol;
275     }
276     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr);
277     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
278 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
279     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
280 #endif
281     ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr);
282     if (!mumps->id.ICNTL(9)) { /* transpose solve */
283       ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr);
284     } else {
285       ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr);
286     }
287     ierr = MatProductSetFromOptions(X);CHKERRQ(ierr);
288     ierr = MatProductSymbolic(X);CHKERRQ(ierr);
289     ierr = MatProductNumeric(X);CHKERRQ(ierr);
290 
291     ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
292     break;
293   default:
294     SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status);
295   }
296   ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr);
297   ierr = MatDestroy(&B);CHKERRQ(ierr);
298   ierr = MatDestroy(&X);CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
303 {
304   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
305   PetscErrorCode ierr;
306 
307   PetscFunctionBegin;
308   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
309     PetscFunctionReturn(0);
310   }
311   if (!expansion) { /* prepare for the condensation step */
312     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
313     /* allocate MUMPS internal array to store reduced right-hand sides */
314     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
315       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
316       mumps->id.lredrhs = mumps->id.size_schur;
317       ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr);
318       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
319     }
320     mumps->id.ICNTL(26) = 1; /* condensation phase */
321   } else { /* prepare for the expansion step */
322     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
323     ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr);
324     mumps->id.ICNTL(26) = 2; /* expansion phase */
325     PetscMUMPS_c(mumps);
326     PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
327     /* restore defaults */
328     mumps->id.ICNTL(26) = -1;
329     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
330     if (mumps->id.nrhs > 1) {
331       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
332       mumps->id.lredrhs = 0;
333       mumps->sizeredrhs = 0;
334     }
335   }
336   PetscFunctionReturn(0);
337 }
338 
339 /*
340   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
341 
342   input:
343     A       - matrix in aij,baij or sbaij format
344     shift   - 0: C style output triple; 1: Fortran style output triple.
345     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
346               MAT_REUSE_MATRIX:   only the values in v array are updated
347   output:
348     nnz     - dim of r, c, and v (number of local nonzero entries of A)
349     r, c, v - row and col index, matrix values (matrix triples)
350 
351   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
352   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
353   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
354 
355  */
356 
357 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
358 {
359   const PetscScalar *av;
360   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
361   PetscInt64        nz,rnz,i,j,k;
362   PetscErrorCode    ierr;
363   PetscMUMPSInt     *row,*col;
364   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
365 
366   PetscFunctionBegin;
367   ierr       = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
368   mumps->val = (PetscScalar*)av;
369   if (reuse == MAT_INITIAL_MATRIX) {
370     nz   = aa->nz;
371     ai   = aa->i;
372     aj   = aa->j;
373     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
374     for (i=k=0; i<M; i++) {
375       rnz = ai[i+1] - ai[i];
376       ajj = aj + ai[i];
377       for (j=0; j<rnz; j++) {
378         ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr);
379         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr);
380         k++;
381       }
382     }
383     mumps->irn = row;
384     mumps->jcn = col;
385     mumps->nnz = nz;
386   }
387   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
388   PetscFunctionReturn(0);
389 }
390 
391 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
392 {
393   PetscErrorCode ierr;
394   PetscInt64     nz,i,j,k,r;
395   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
396   PetscMUMPSInt  *row,*col;
397 
398   PetscFunctionBegin;
399   mumps->val = a->val;
400   if (reuse == MAT_INITIAL_MATRIX) {
401     nz   = a->sliidx[a->totalslices];
402     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
403     for (i=k=0; i<a->totalslices; i++) {
404       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
405         ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr);
406       }
407     }
408     for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);}
409     mumps->irn = row;
410     mumps->jcn = col;
411     mumps->nnz = nz;
412   }
413   PetscFunctionReturn(0);
414 }
415 
416 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
417 {
418   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
419   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
420   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
421   PetscInt       bs;
422   PetscErrorCode ierr;
423   PetscMUMPSInt  *row,*col;
424 
425   PetscFunctionBegin;
426   ierr       = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
427   M          = A->rmap->N/bs;
428   mumps->val = aa->a;
429   if (reuse == MAT_INITIAL_MATRIX) {
430     ai   = aa->i; aj = aa->j;
431     nz   = bs2*aa->nz;
432     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
433     for (i=0; i<M; i++) {
434       ajj = aj + ai[i];
435       rnz = ai[i+1] - ai[i];
436       for (k=0; k<rnz; k++) {
437         for (j=0; j<bs; j++) {
438           for (m=0; m<bs; m++) {
439             ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr);
440             ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr);
441             idx++;
442           }
443         }
444       }
445     }
446     mumps->irn = row;
447     mumps->jcn = col;
448     mumps->nnz = nz;
449   }
450   PetscFunctionReturn(0);
451 }
452 
453 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
454 {
455   const PetscInt *ai, *aj,*ajj;
456   PetscInt        bs;
457   PetscInt64      nz,rnz,i,j,k,m;
458   PetscErrorCode  ierr;
459   PetscMUMPSInt   *row,*col;
460   PetscScalar     *val;
461   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
462   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
463 #if defined(PETSC_USE_COMPLEX)
464   PetscBool       hermitian;
465 #endif
466 
467   PetscFunctionBegin;
468 #if defined(PETSC_USE_COMPLEX)
469   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
470   PetscCheckFalse(hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
471 #endif
472   ai   = aa->i;
473   aj   = aa->j;
474   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
475   if (reuse == MAT_INITIAL_MATRIX) {
476     nz   = aa->nz;
477     ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr);
478     if (bs>1) {
479       ierr       = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr);
480       mumps->val = mumps->val_alloc;
481     } else {
482       mumps->val = aa->a;
483     }
484     mumps->irn = row;
485     mumps->jcn = col;
486   } else {
487     if (bs == 1) mumps->val = aa->a;
488     row = mumps->irn;
489     col = mumps->jcn;
490   }
491   val = mumps->val;
492 
493   nz = 0;
494   if (bs>1) {
495     for (i=0; i<mbs; i++) {
496       rnz = ai[i+1] - ai[i];
497       ajj = aj + ai[i];
498       for (j=0; j<rnz; j++) {
499         for (k=0; k<bs; k++) {
500           for (m=0; m<bs; m++) {
501             if (ajj[j]>i || k>=m) {
502               if (reuse == MAT_INITIAL_MATRIX) {
503                 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr);
504                 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr);
505               }
506               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
507             }
508           }
509         }
510       }
511     }
512   } else if (reuse == MAT_INITIAL_MATRIX) {
513     for (i=0; i<mbs; i++) {
514       rnz = ai[i+1] - ai[i];
515       ajj = aj + ai[i];
516       for (j=0; j<rnz; j++) {
517         ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
518         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
519         nz++;
520       }
521     }
522     PetscCheckFalse(nz != aa->nz,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz);
523   }
524   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
525   PetscFunctionReturn(0);
526 }
527 
528 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
529 {
530   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
531   PetscInt64        nz,rnz,i,j;
532   const PetscScalar *av,*v1;
533   PetscScalar       *val;
534   PetscErrorCode    ierr;
535   PetscMUMPSInt     *row,*col;
536   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
537   PetscBool         missing;
538 #if defined(PETSC_USE_COMPLEX)
539   PetscBool         hermitian;
540 #endif
541 
542   PetscFunctionBegin;
543 #if defined(PETSC_USE_COMPLEX)
544   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
545   PetscCheckFalse(hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
546 #endif
547   ierr  = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
548   ai    = aa->i; aj = aa->j;
549   adiag = aa->diag;
550   ierr  = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr);
551   if (reuse == MAT_INITIAL_MATRIX) {
552     /* count nz in the upper triangular part of A */
553     nz = 0;
554     if (missing) {
555       for (i=0; i<M; i++) {
556         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
557           for (j=ai[i];j<ai[i+1];j++) {
558             if (aj[j] < i) continue;
559             nz++;
560           }
561         } else {
562           nz += ai[i+1] - adiag[i];
563         }
564       }
565     } else {
566       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
567     }
568     ierr       = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
569     ierr       = PetscMalloc1(nz,&val);CHKERRQ(ierr);
570     mumps->nnz = nz;
571     mumps->irn = row;
572     mumps->jcn = col;
573     mumps->val = mumps->val_alloc = val;
574 
575     nz = 0;
576     if (missing) {
577       for (i=0; i<M; i++) {
578         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
579           for (j=ai[i];j<ai[i+1];j++) {
580             if (aj[j] < i) continue;
581             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
582             ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr);
583             val[nz] = av[j];
584             nz++;
585           }
586         } else {
587           rnz = ai[i+1] - adiag[i];
588           ajj = aj + adiag[i];
589           v1  = av + adiag[i];
590           for (j=0; j<rnz; j++) {
591             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
592             ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
593             val[nz++] = v1[j];
594           }
595         }
596       }
597     } else {
598       for (i=0; i<M; i++) {
599         rnz = ai[i+1] - adiag[i];
600         ajj = aj + adiag[i];
601         v1  = av + adiag[i];
602         for (j=0; j<rnz; j++) {
603           ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
604           ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
605           val[nz++] = v1[j];
606         }
607       }
608     }
609   } else {
610     nz = 0;
611     val = mumps->val;
612     if (missing) {
613       for (i=0; i <M; i++) {
614         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
615           for (j=ai[i];j<ai[i+1];j++) {
616             if (aj[j] < i) continue;
617             val[nz++] = av[j];
618           }
619         } else {
620           rnz = ai[i+1] - adiag[i];
621           v1  = av + adiag[i];
622           for (j=0; j<rnz; j++) {
623             val[nz++] = v1[j];
624           }
625         }
626       }
627     } else {
628       for (i=0; i <M; i++) {
629         rnz = ai[i+1] - adiag[i];
630         v1  = av + adiag[i];
631         for (j=0; j<rnz; j++) {
632           val[nz++] = v1[j];
633         }
634       }
635     }
636   }
637   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
638   PetscFunctionReturn(0);
639 }
640 
641 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
642 {
643   PetscErrorCode    ierr;
644   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
645   PetscInt          bs;
646   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
647   PetscMUMPSInt     *row,*col;
648   const PetscScalar *av,*bv,*v1,*v2;
649   PetscScalar       *val;
650   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
651   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
652   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
653   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
654 #if defined(PETSC_USE_COMPLEX)
655   PetscBool         hermitian;
656 #endif
657 
658   PetscFunctionBegin;
659 #if defined(PETSC_USE_COMPLEX)
660   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
661   PetscCheckFalse(hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
662 #endif
663   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
664   rstart = A->rmap->rstart;
665   ai = aa->i;
666   aj = aa->j;
667   bi = bb->i;
668   bj = bb->j;
669   av = aa->a;
670   bv = bb->a;
671 
672   garray = mat->garray;
673 
674   if (reuse == MAT_INITIAL_MATRIX) {
675     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
676     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
677     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
678     /* can not decide the exact mumps->nnz now because of the SBAIJ */
679     mumps->irn = row;
680     mumps->jcn = col;
681     mumps->val = mumps->val_alloc = val;
682   } else {
683     val = mumps->val;
684   }
685 
686   jj = 0; irow = rstart;
687   for (i=0; i<mbs; i++) {
688     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
689     countA = ai[i+1] - ai[i];
690     countB = bi[i+1] - bi[i];
691     bjj    = bj + bi[i];
692     v1     = av + ai[i]*bs2;
693     v2     = bv + bi[i]*bs2;
694 
695     if (bs>1) {
696       /* A-part */
697       for (j=0; j<countA; j++) {
698         for (k=0; k<bs; k++) {
699           for (m=0; m<bs; m++) {
700             if (rstart + ajj[j]*bs>irow || k>=m) {
701               if (reuse == MAT_INITIAL_MATRIX) {
702                 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
703                 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
704               }
705               val[jj++] = v1[j*bs2 + m + k*bs];
706             }
707           }
708         }
709       }
710 
711       /* B-part */
712       for (j=0; j < countB; j++) {
713         for (k=0; k<bs; k++) {
714           for (m=0; m<bs; m++) {
715             if (reuse == MAT_INITIAL_MATRIX) {
716               ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
717               ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
718             }
719             val[jj++] = v2[j*bs2 + m + k*bs];
720           }
721         }
722       }
723     } else {
724       /* A-part */
725       for (j=0; j<countA; j++) {
726         if (reuse == MAT_INITIAL_MATRIX) {
727           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
728           ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
729         }
730         val[jj++] = v1[j];
731       }
732 
733       /* B-part */
734       for (j=0; j < countB; j++) {
735         if (reuse == MAT_INITIAL_MATRIX) {
736           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
737           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
738         }
739         val[jj++] = v2[j];
740       }
741     }
742     irow+=bs;
743   }
744   mumps->nnz = jj;
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
749 {
750   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
751   PetscErrorCode    ierr;
752   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
753   PetscMUMPSInt     *row,*col;
754   const PetscScalar *av, *bv,*v1,*v2;
755   PetscScalar       *val;
756   Mat               Ad,Ao;
757   Mat_SeqAIJ        *aa;
758   Mat_SeqAIJ        *bb;
759 
760   PetscFunctionBegin;
761   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
762   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
763   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
764 
765   aa = (Mat_SeqAIJ*)(Ad)->data;
766   bb = (Mat_SeqAIJ*)(Ao)->data;
767   ai = aa->i;
768   aj = aa->j;
769   bi = bb->i;
770   bj = bb->j;
771 
772   rstart = A->rmap->rstart;
773 
774   if (reuse == MAT_INITIAL_MATRIX) {
775     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
776     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
777     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
778     mumps->nnz = nz;
779     mumps->irn = row;
780     mumps->jcn = col;
781     mumps->val = mumps->val_alloc = val;
782   } else {
783     val = mumps->val;
784   }
785 
786   jj = 0; irow = rstart;
787   for (i=0; i<m; i++) {
788     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
789     countA = ai[i+1] - ai[i];
790     countB = bi[i+1] - bi[i];
791     bjj    = bj + bi[i];
792     v1     = av + ai[i];
793     v2     = bv + bi[i];
794 
795     /* A-part */
796     for (j=0; j<countA; j++) {
797       if (reuse == MAT_INITIAL_MATRIX) {
798         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
799         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
800       }
801       val[jj++] = v1[j];
802     }
803 
804     /* B-part */
805     for (j=0; j < countB; j++) {
806       if (reuse == MAT_INITIAL_MATRIX) {
807         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
808         ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
809       }
810       val[jj++] = v2[j];
811     }
812     irow++;
813   }
814   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
815   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
820 {
821   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
822   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
823   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
824   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
825   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
826   const PetscInt    bs2=mat->bs2;
827   PetscErrorCode    ierr;
828   PetscInt          bs;
829   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
830   PetscMUMPSInt     *row,*col;
831   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
832   PetscScalar       *val;
833 
834   PetscFunctionBegin;
835   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
836   if (reuse == MAT_INITIAL_MATRIX) {
837     nz   = bs2*(aa->nz + bb->nz);
838     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
839     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
840     mumps->nnz = nz;
841     mumps->irn = row;
842     mumps->jcn = col;
843     mumps->val = mumps->val_alloc = val;
844   } else {
845     val = mumps->val;
846   }
847 
848   jj = 0; irow = rstart;
849   for (i=0; i<mbs; i++) {
850     countA = ai[i+1] - ai[i];
851     countB = bi[i+1] - bi[i];
852     ajj    = aj + ai[i];
853     bjj    = bj + bi[i];
854     v1     = av + bs2*ai[i];
855     v2     = bv + bs2*bi[i];
856 
857     idx = 0;
858     /* A-part */
859     for (k=0; k<countA; k++) {
860       for (j=0; j<bs; j++) {
861         for (n=0; n<bs; n++) {
862           if (reuse == MAT_INITIAL_MATRIX) {
863             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
864             ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr);
865           }
866           val[jj++] = v1[idx++];
867         }
868       }
869     }
870 
871     idx = 0;
872     /* B-part */
873     for (k=0; k<countB; k++) {
874       for (j=0; j<bs; j++) {
875         for (n=0; n<bs; n++) {
876           if (reuse == MAT_INITIAL_MATRIX) {
877             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
878             ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr);
879           }
880           val[jj++] = v2[idx++];
881         }
882       }
883     }
884     irow += bs;
885   }
886   PetscFunctionReturn(0);
887 }
888 
889 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
890 {
891   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
892   PetscErrorCode    ierr;
893   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
894   PetscMUMPSInt     *row,*col;
895   const PetscScalar *av, *bv,*v1,*v2;
896   PetscScalar       *val;
897   Mat               Ad,Ao;
898   Mat_SeqAIJ        *aa;
899   Mat_SeqAIJ        *bb;
900 #if defined(PETSC_USE_COMPLEX)
901   PetscBool         hermitian;
902 #endif
903 
904   PetscFunctionBegin;
905 #if defined(PETSC_USE_COMPLEX)
906   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
907   PetscCheckFalse(hermitian,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
908 #endif
909   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
910   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
911   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
912 
913   aa    = (Mat_SeqAIJ*)(Ad)->data;
914   bb    = (Mat_SeqAIJ*)(Ao)->data;
915   ai    = aa->i;
916   aj    = aa->j;
917   adiag = aa->diag;
918   bi    = bb->i;
919   bj    = bb->j;
920 
921   rstart = A->rmap->rstart;
922 
923   if (reuse == MAT_INITIAL_MATRIX) {
924     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
925     nzb = 0;    /* num of upper triangular entries in mat->B */
926     for (i=0; i<m; i++) {
927       nza   += (ai[i+1] - adiag[i]);
928       countB = bi[i+1] - bi[i];
929       bjj    = bj + bi[i];
930       for (j=0; j<countB; j++) {
931         if (garray[bjj[j]] > rstart) nzb++;
932       }
933     }
934 
935     nz   = nza + nzb; /* total nz of upper triangular part of mat */
936     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
937     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
938     mumps->nnz = nz;
939     mumps->irn = row;
940     mumps->jcn = col;
941     mumps->val = mumps->val_alloc = val;
942   } else {
943     val = mumps->val;
944   }
945 
946   jj = 0; irow = rstart;
947   for (i=0; i<m; i++) {
948     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
949     v1     = av + adiag[i];
950     countA = ai[i+1] - adiag[i];
951     countB = bi[i+1] - bi[i];
952     bjj    = bj + bi[i];
953     v2     = bv + bi[i];
954 
955     /* A-part */
956     for (j=0; j<countA; j++) {
957       if (reuse == MAT_INITIAL_MATRIX) {
958         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
959         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
960       }
961       val[jj++] = v1[j];
962     }
963 
964     /* B-part */
965     for (j=0; j < countB; j++) {
966       if (garray[bjj[j]] > rstart) {
967         if (reuse == MAT_INITIAL_MATRIX) {
968           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
969           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
970         }
971         val[jj++] = v2[j];
972       }
973     }
974     irow++;
975   }
976   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
977   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatDestroy_MUMPS(Mat A)
982 {
983   PetscErrorCode ierr;
984   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
985 
986   PetscFunctionBegin;
987   ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
988   ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr);
989   ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
990   ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr);
991   ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
992   ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr);
993   ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
994   ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
995   ierr = PetscFree(mumps->info);CHKERRQ(ierr);
996   ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
997   mumps->id.job = JOB_END;
998   PetscMUMPS_c(mumps);
999   PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
1000 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1001   if (mumps->use_petsc_omp_support) {
1002     ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr);
1003     ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1004     ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr);
1005   }
1006 #endif
1007   ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
1008   ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
1009   ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr);
1010   ierr = PetscFree(mumps->reqs);CHKERRQ(ierr);
1011   ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr);
1012   if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);CHKERRQ(ierr);}
1013   ierr = PetscFree(A->data);CHKERRQ(ierr);
1014 
1015   /* clear composed functions */
1016   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1017   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr);
1018   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr);
1019   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr);
1020   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr);
1021   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr);
1022   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr);
1023   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr);
1024   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr);
1025   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr);
1026   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr);
1027   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr);
1028   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr);
1029   PetscFunctionReturn(0);
1030 }
1031 
1032 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1033 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1034 {
1035   PetscErrorCode     ierr;
1036   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1037   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1038   PetscInt           i,m,M,rstart;
1039 
1040   PetscFunctionBegin;
1041   ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr);
1042   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
1043   PetscCheckFalse(M > PETSC_MUMPS_INT_MAX,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1044   if (ompsize == 1) {
1045     if (!mumps->irhs_loc) {
1046       mumps->nloc_rhs = m;
1047       ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr);
1048       ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1049       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1050     }
1051     mumps->id.rhs_loc = (MumpsScalar*)array;
1052   } else {
1053   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1054     const PetscInt  *ranges;
1055     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1056     MPI_Group       petsc_group,omp_group;
1057     PetscScalar     *recvbuf=NULL;
1058 
1059     if (mumps->is_omp_master) {
1060       /* Lazily initialize the omp stuff for distributed rhs */
1061       if (!mumps->irhs_loc) {
1062         ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr);
1063         ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr);
1064         ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRMPI(ierr);
1065         ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRMPI(ierr);
1066         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1067         ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRMPI(ierr);
1068 
1069         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1070         mumps->nloc_rhs = 0;
1071         ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr);
1072         for (j=0; j<ompsize; j++) {
1073           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1074           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1075         }
1076         ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr);
1077         for (j=k=0; j<ompsize; j++) {
1078           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1079         }
1080 
1081         ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr);
1082         ierr = MPI_Group_free(&petsc_group);CHKERRMPI(ierr);
1083         ierr = MPI_Group_free(&omp_group);CHKERRMPI(ierr);
1084       }
1085 
1086       /* Realloc buffers when current nrhs is bigger than what we have met */
1087       if (nrhs > mumps->max_nrhs) {
1088         ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1089         ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr);
1090         mumps->max_nrhs = nrhs;
1091       }
1092 
1093       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1094       for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);}
1095       mumps->rhs_disps[0] = 0;
1096       for (j=1; j<ompsize; j++) {
1097         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1098         PetscCheckFalse(mumps->rhs_disps[j] < 0,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1099       }
1100       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1101     }
1102 
1103     ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr);
1104     ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRMPI(ierr);
1105 
1106     if (mumps->is_omp_master) {
1107       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1108         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1109         for (j=0; j<ompsize; j++) {
1110           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1111           dst = dstbase;
1112           for (i=0; i<nrhs; i++) {
1113             ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr);
1114             src += mumps->rhs_nrow[j];
1115             dst += mumps->nloc_rhs;
1116           }
1117           dstbase += mumps->rhs_nrow[j];
1118         }
1119       }
1120       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1121     }
1122   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1123   }
1124   mumps->id.nrhs     = nrhs;
1125   mumps->id.nloc_rhs = mumps->nloc_rhs;
1126   mumps->id.lrhs_loc = mumps->nloc_rhs;
1127   mumps->id.irhs_loc = mumps->irhs_loc;
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1132 {
1133   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1134   const PetscScalar  *rarray = NULL;
1135   PetscScalar        *array;
1136   IS                 is_iden,is_petsc;
1137   PetscErrorCode     ierr;
1138   PetscInt           i;
1139   PetscBool          second_solve = PETSC_FALSE;
1140   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1141 
1142   PetscFunctionBegin;
1143   ierr = PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1);CHKERRQ(ierr);
1144   ierr = PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2);CHKERRQ(ierr);
1145 
1146   if (A->factorerrortype) {
1147     ierr = PetscInfo(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1148     ierr = VecSetInf(x);CHKERRQ(ierr);
1149     PetscFunctionReturn(0);
1150   }
1151 
1152   mumps->id.nrhs = 1;
1153   if (mumps->petsc_size > 1) {
1154     if (mumps->ICNTL20 == 10) {
1155       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1156       ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr);
1157       ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr);
1158     } else {
1159       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1160       ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1161       ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1162       if (!mumps->myid) {
1163         ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr);
1164         mumps->id.rhs = (MumpsScalar*)array;
1165       }
1166     }
1167   } else {  /* petsc_size == 1 */
1168     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1169     ierr = VecCopy(b,x);CHKERRQ(ierr);
1170     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
1171     mumps->id.rhs = (MumpsScalar*)array;
1172   }
1173 
1174   /*
1175      handle condensation step of Schur complement (if any)
1176      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1177      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1178      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1179      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1180   */
1181   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1182     PetscCheckFalse(mumps->petsc_size > 1,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1183     second_solve = PETSC_TRUE;
1184     ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1185   }
1186   /* solve phase */
1187   /*-------------*/
1188   mumps->id.job = JOB_SOLVE;
1189   PetscMUMPS_c(mumps);
1190   PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1191 
1192   /* handle expansion step of Schur complement (if any) */
1193   if (second_solve) {
1194     ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1195   }
1196 
1197   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1198     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1199       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1200       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1201     }
1202     if (!mumps->scat_sol) { /* create scatter scat_sol */
1203       PetscInt *isol2_loc=NULL;
1204       ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
1205       ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr);
1206       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1207       ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr);  /* to */
1208       ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr);
1209       ierr = ISDestroy(&is_iden);CHKERRQ(ierr);
1210       ierr = ISDestroy(&is_petsc);CHKERRQ(ierr);
1211       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1212     }
1213 
1214     ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1215     ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1216   }
1217 
1218   if (mumps->petsc_size > 1) {
1219     if (mumps->ICNTL20 == 10) {
1220       ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr);
1221     } else if (!mumps->myid) {
1222       ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr);
1223     }
1224   } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);}
1225 
1226   ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1231 {
1232   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1233   PetscErrorCode ierr;
1234 
1235   PetscFunctionBegin;
1236   mumps->id.ICNTL(9) = 0;
1237   ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr);
1238   mumps->id.ICNTL(9) = 1;
1239   PetscFunctionReturn(0);
1240 }
1241 
1242 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1243 {
1244   PetscErrorCode    ierr;
1245   Mat               Bt = NULL;
1246   PetscBool         denseX,denseB,flg,flgT;
1247   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1248   PetscInt          i,nrhs,M;
1249   PetscScalar       *array;
1250   const PetscScalar *rbray;
1251   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1252   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1253   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1254   IS                is_to,is_from;
1255   PetscInt          k,proc,j,m,myrstart;
1256   const PetscInt    *rstart;
1257   Vec               v_mpi,msol_loc;
1258   VecScatter        scat_sol;
1259   Vec               b_seq;
1260   VecScatter        scat_rhs;
1261   PetscScalar       *aa;
1262   PetscInt          spnr,*ia,*ja;
1263   Mat_MPIAIJ        *b = NULL;
1264 
1265   PetscFunctionBegin;
1266   ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1267   PetscCheckFalse(!denseX,PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1268 
1269   ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1270   if (denseB) {
1271     PetscCheckFalse(B->rmap->n != X->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1272     mumps->id.ICNTL(20)= 0; /* dense RHS */
1273   } else { /* sparse B */
1274     PetscCheckFalse(X == B,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1275     ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr);
1276     if (flgT) { /* input B is transpose of actural RHS matrix,
1277                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1278       ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr);
1279     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1280     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1281   }
1282 
1283   ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr);
1284   mumps->id.nrhs = nrhs;
1285   mumps->id.lrhs = M;
1286   mumps->id.rhs  = NULL;
1287 
1288   if (mumps->petsc_size == 1) {
1289     PetscScalar *aa;
1290     PetscInt    spnr,*ia,*ja;
1291     PetscBool   second_solve = PETSC_FALSE;
1292 
1293     ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1294     mumps->id.rhs = (MumpsScalar*)array;
1295 
1296     if (denseB) {
1297       /* copy B to X */
1298       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1299       ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr);
1300       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1301     } else { /* sparse B */
1302       ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr);
1303       ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1304       PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1305       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1306       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1307     }
1308     /* handle condensation step of Schur complement (if any) */
1309     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1310       second_solve = PETSC_TRUE;
1311       ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1312     }
1313     /* solve phase */
1314     /*-------------*/
1315     mumps->id.job = JOB_SOLVE;
1316     PetscMUMPS_c(mumps);
1317     PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1318 
1319     /* handle expansion step of Schur complement (if any) */
1320     if (second_solve) {
1321       ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1322     }
1323     if (!denseB) { /* sparse B */
1324       ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr);
1325       ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1326       PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1327     }
1328     ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1329     PetscFunctionReturn(0);
1330   }
1331 
1332   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1333   PetscCheckFalse(mumps->petsc_size > 1 && mumps->id.ICNTL(19),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1334 
1335   /* create msol_loc to hold mumps local solution */
1336   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1337   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1338 
1339   lsol_loc  = mumps->id.lsol_loc;
1340   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1341   ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr);
1342   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1343   mumps->id.isol_loc = isol_loc;
1344 
1345   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr);
1346 
1347   if (denseB) {
1348     if (mumps->ICNTL20 == 10) {
1349       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1350       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1351       ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr);
1352       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1353       ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1354       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr);
1355     } else {
1356       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1357       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1358         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1359         0, re-arrange B into desired order, which is a local operation.
1360       */
1361 
1362       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1363       /* wrap dense rhs matrix B into a vector v_mpi */
1364       ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1365       ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr);
1366       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1367       ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr);
1368 
1369       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1370       if (!mumps->myid) {
1371         PetscInt *idx;
1372         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1373         ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr);
1374         ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr);
1375         k = 0;
1376         for (proc=0; proc<mumps->petsc_size; proc++) {
1377           for (j=0; j<nrhs; j++) {
1378             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1379           }
1380         }
1381 
1382         ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr);
1383         ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr);
1384         ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr);
1385       } else {
1386         ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr);
1387         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr);
1388         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr);
1389       }
1390       ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr);
1391       ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1392       ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1393       ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1394       ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1395 
1396       if (!mumps->myid) { /* define rhs on the host */
1397         ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr);
1398         mumps->id.rhs = (MumpsScalar*)bray;
1399         ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr);
1400       }
1401     }
1402   } else { /* sparse B */
1403     b = (Mat_MPIAIJ*)Bt->data;
1404 
1405     /* wrap dense X into a vector v_mpi */
1406     ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr);
1407     ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr);
1408     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1409     ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr);
1410 
1411     if (!mumps->myid) {
1412       ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr);
1413       ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1414       PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1415       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1416       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1417     } else {
1418       mumps->id.irhs_ptr    = NULL;
1419       mumps->id.irhs_sparse = NULL;
1420       mumps->id.nz_rhs      = 0;
1421       mumps->id.rhs_sparse  = NULL;
1422     }
1423   }
1424 
1425   /* solve phase */
1426   /*-------------*/
1427   mumps->id.job = JOB_SOLVE;
1428   PetscMUMPS_c(mumps);
1429   PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1430 
1431   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1432   ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1433   ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr);
1434 
1435   /* create scatter scat_sol */
1436   ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr);
1437   /* iidx: index for scatter mumps solution to petsc X */
1438 
1439   ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr);
1440   ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr);
1441   for (i=0; i<lsol_loc; i++) {
1442     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1443 
1444     for (proc=0; proc<mumps->petsc_size; proc++) {
1445       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1446         myrstart = rstart[proc];
1447         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1448         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1449         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1450         break;
1451       }
1452     }
1453 
1454     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1455   }
1456   ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr);
1457   ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr);
1458   ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1459   ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1460   ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1461   ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1462   ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1463 
1464   /* free spaces */
1465   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1466   mumps->id.isol_loc = isol_loc_save;
1467 
1468   ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr);
1469   ierr = PetscFree(idxx);CHKERRQ(ierr);
1470   ierr = VecDestroy(&msol_loc);CHKERRQ(ierr);
1471   ierr = VecDestroy(&v_mpi);CHKERRQ(ierr);
1472   if (!denseB) {
1473     if (!mumps->myid) {
1474       b = (Mat_MPIAIJ*)Bt->data;
1475       ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr);
1476       ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1477       PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1478     }
1479   } else {
1480     if (mumps->ICNTL20 == 0) {
1481       ierr = VecDestroy(&b_seq);CHKERRQ(ierr);
1482       ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr);
1483     }
1484   }
1485   ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr);
1486   ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr);
1487   PetscFunctionReturn(0);
1488 }
1489 
1490 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1491 {
1492   PetscErrorCode ierr;
1493   PetscBool      flg;
1494   Mat            B;
1495 
1496   PetscFunctionBegin;
1497   ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
1498   PetscCheckFalse(!flg,PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1499 
1500   /* Create B=Bt^T that uses Bt's data structure */
1501   ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr);
1502 
1503   ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr);
1504   ierr = MatDestroy(&B);CHKERRQ(ierr);
1505   PetscFunctionReturn(0);
1506 }
1507 
1508 #if !defined(PETSC_USE_COMPLEX)
1509 /*
1510   input:
1511    F:        numeric factor
1512   output:
1513    nneg:     total number of negative pivots
1514    nzero:    total number of zero pivots
1515    npos:     (global dimension of F) - nneg - nzero
1516 */
1517 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1518 {
1519   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1520   PetscErrorCode ierr;
1521   PetscMPIInt    size;
1522 
1523   PetscFunctionBegin;
1524   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRMPI(ierr);
1525   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1526   PetscCheckFalse(size > 1 && mumps->id.ICNTL(13) != 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13));
1527 
1528   if (nneg) *nneg = mumps->id.INFOG(12);
1529   if (nzero || npos) {
1530     PetscCheckFalse(mumps->id.ICNTL(24) != 1,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1531     if (nzero) *nzero = mumps->id.INFOG(28);
1532     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1533   }
1534   PetscFunctionReturn(0);
1535 }
1536 #endif
1537 
1538 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1539 {
1540   PetscErrorCode ierr;
1541   PetscInt       i,nreqs;
1542   PetscMUMPSInt  *irn,*jcn;
1543   PetscMPIInt    count;
1544   PetscInt64     totnnz,remain;
1545   const PetscInt osize=mumps->omp_comm_size;
1546   PetscScalar    *val;
1547 
1548   PetscFunctionBegin;
1549   if (osize > 1) {
1550     if (reuse == MAT_INITIAL_MATRIX) {
1551       /* master first gathers counts of nonzeros to receive */
1552       if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);}
1553       ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRMPI(ierr);
1554 
1555       /* Then each computes number of send/recvs */
1556       if (mumps->is_omp_master) {
1557         /* Start from 1 since self communication is not done in MPI */
1558         nreqs = 0;
1559         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1560       } else {
1561         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1562       }
1563       ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */
1564 
1565       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1566          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1567          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1568          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1569        */
1570       nreqs = 0; /* counter for actual send/recvs */
1571       if (mumps->is_omp_master) {
1572         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1573         ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr);
1574         ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr);
1575 
1576         /* Self communication */
1577         ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr);
1578         ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr);
1579         ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr);
1580 
1581         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1582         ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
1583         ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
1584         mumps->nnz = totnnz;
1585         mumps->irn = irn;
1586         mumps->jcn = jcn;
1587         mumps->val = mumps->val_alloc = val;
1588 
1589         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1590         jcn += mumps->recvcount[0];
1591         val += mumps->recvcount[0];
1592 
1593         /* Remote communication */
1594         for (i=1; i<osize; i++) {
1595           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1596           remain = mumps->recvcount[i] - count;
1597           while (count>0) {
1598             ierr    = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1599             ierr    = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1600             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1601             irn    += count;
1602             jcn    += count;
1603             val    += count;
1604             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1605             remain -= count;
1606           }
1607         }
1608       } else {
1609         irn    = mumps->irn;
1610         jcn    = mumps->jcn;
1611         val    = mumps->val;
1612         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1613         remain = mumps->nnz - count;
1614         while (count>0) {
1615           ierr    = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1616           ierr    = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1617           ierr    = MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1618           irn    += count;
1619           jcn    += count;
1620           val    += count;
1621           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1622           remain -= count;
1623         }
1624       }
1625     } else {
1626       nreqs = 0;
1627       if (mumps->is_omp_master) {
1628         val = mumps->val + mumps->recvcount[0];
1629         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1630           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1631           remain = mumps->recvcount[i] - count;
1632           while (count>0) {
1633             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1634             val    += count;
1635             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1636             remain -= count;
1637           }
1638         }
1639       } else {
1640         val    = mumps->val;
1641         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1642         remain = mumps->nnz - count;
1643         while (count>0) {
1644           ierr    = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1645           val    += count;
1646           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1647           remain -= count;
1648         }
1649       }
1650     }
1651     ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
1652     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1653   }
1654   PetscFunctionReturn(0);
1655 }
1656 
1657 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1658 {
1659   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1660   PetscErrorCode ierr;
1661   PetscBool      isMPIAIJ;
1662 
1663   PetscFunctionBegin;
1664   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1665     if (mumps->id.INFOG(1) == -6) {
1666       ierr = PetscInfo(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1667     }
1668     ierr = PetscInfo(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1669     PetscFunctionReturn(0);
1670   }
1671 
1672   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr);
1673   ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr);
1674 
1675   /* numerical factorization phase */
1676   /*-------------------------------*/
1677   mumps->id.job = JOB_FACTNUMERIC;
1678   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1679     if (!mumps->myid) {
1680       mumps->id.a = (MumpsScalar*)mumps->val;
1681     }
1682   } else {
1683     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1684   }
1685   PetscMUMPS_c(mumps);
1686   if (mumps->id.INFOG(1) < 0) {
1687     if (A->erroriffailure) {
1688       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
1689     } else {
1690       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1691         ierr = PetscInfo(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1692         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1693       } else if (mumps->id.INFOG(1) == -13) {
1694         ierr = PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1695         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1696       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1697         ierr = PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1698         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1699       } else {
1700         ierr = PetscInfo(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1701         F->factorerrortype = MAT_FACTOR_OTHER;
1702       }
1703     }
1704   }
1705   PetscCheckFalse(!mumps->myid && mumps->id.ICNTL(16) > 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16));
1706 
1707   F->assembled    = PETSC_TRUE;
1708 
1709   if (F->schur) { /* reset Schur status to unfactored */
1710 #if defined(PETSC_HAVE_CUDA)
1711     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1712 #endif
1713     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1714       mumps->id.ICNTL(19) = 2;
1715       ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr);
1716     }
1717     ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr);
1718   }
1719 
1720   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1721   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1722 
1723   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1724   if (mumps->petsc_size > 1) {
1725     PetscInt    lsol_loc;
1726     PetscScalar *sol_loc;
1727 
1728     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr);
1729 
1730     /* distributed solution; Create x_seq=sol_loc for repeated use */
1731     if (mumps->x_seq) {
1732       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1733       ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
1734       ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
1735     }
1736     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1737     ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr);
1738     mumps->id.lsol_loc = lsol_loc;
1739     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1740     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr);
1741   }
1742   ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr);
1743   PetscFunctionReturn(0);
1744 }
1745 
1746 /* Sets MUMPS options from the options database */
1747 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1748 {
1749   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1750   PetscErrorCode ierr;
1751   PetscMUMPSInt  icntl=0;
1752   PetscInt       info[80],i,ninfo=80;
1753   PetscBool      flg=PETSC_FALSE;
1754 
1755   PetscFunctionBegin;
1756   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
1757   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr);
1758   if (flg) mumps->id.ICNTL(1) = icntl;
1759   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr);
1760   if (flg) mumps->id.ICNTL(2) = icntl;
1761   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr);
1762   if (flg) mumps->id.ICNTL(3) = icntl;
1763 
1764   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
1765   if (flg) mumps->id.ICNTL(4) = icntl;
1766   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1767 
1768   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr);
1769   if (flg) mumps->id.ICNTL(6) = icntl;
1770 
1771   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
1772   if (flg) {
1773     PetscCheckFalse(icntl == 1 || icntl < 0 || icntl > 7,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1774     mumps->id.ICNTL(7) = icntl;
1775   }
1776 
1777   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr);
1778   /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */
1779   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr);
1780   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr);
1781   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr);
1782   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr);
1783   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr);
1784   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr);
1785   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1786     ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
1787     ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
1788   }
1789 
1790   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1791      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1792      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1793      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1794      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1795      In short, we could not use distributed RHS with MPICH until v4.0b1.
1796    */
1797 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1798   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1799 #else
1800   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1801 #endif
1802   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr);
1803   PetscCheckFalse(flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20);
1804 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1805   PetscCheckFalse(flg && mumps->ICNTL20 == 10,PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1806 #endif
1807   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */
1808 
1809   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr);
1810   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr);
1811   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr);
1812   if (mumps->id.ICNTL(24)) {
1813     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1814   }
1815 
1816   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr);
1817   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr);
1818   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr);
1819   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr);
1820   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr);
1821   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */
1822   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr);
1823   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr);  -- not supported by PETSc API */
1824   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr);
1825   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr);
1826   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr);
1827   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr);
1828 
1829   ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr);
1830   ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr);
1831   ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr);
1832   ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr);
1833   ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr);
1834   ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr);
1835 
1836   ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr);
1837 
1838   ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr);
1839   if (ninfo) {
1840     PetscCheckFalse(ninfo > 80,PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo);
1841     ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr);
1842     mumps->ninfo = ninfo;
1843     for (i=0; i<ninfo; i++) {
1844       PetscCheckFalse(info[i] < 0 || info[i]>80,PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo);
1845       else  mumps->info[i] = info[i];
1846     }
1847   }
1848 
1849   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1854 {
1855   PetscErrorCode ierr;
1856   PetscInt       nthreads=0;
1857 
1858   PetscFunctionBegin;
1859   mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1860   ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRMPI(ierr);
1861   ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRMPI(ierr);/* "if (!myid)" still works even if mumps_comm is different */
1862 
1863   ierr = PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr);
1864   if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1865   ierr = PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr);
1866   if (mumps->use_petsc_omp_support) {
1867 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1868     ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr);
1869     ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr);
1870 #else
1871     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:"");
1872 #endif
1873   } else {
1874     mumps->omp_comm      = PETSC_COMM_SELF;
1875     mumps->mumps_comm    = mumps->petsc_comm;
1876     mumps->is_omp_master = PETSC_TRUE;
1877   }
1878   ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRMPI(ierr);
1879   mumps->reqs = NULL;
1880   mumps->tag  = 0;
1881 
1882   /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1883   if (mumps->mumps_comm != MPI_COMM_NULL) {
1884     ierr =  PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);CHKERRQ(ierr);
1885   }
1886 
1887   mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1888   mumps->id.job = JOB_INIT;
1889   mumps->id.par = 1;  /* host participates factorizaton and solve */
1890   mumps->id.sym = mumps->sym;
1891 
1892   PetscMUMPS_c(mumps);
1893   PetscCheckFalse(mumps->id.INFOG(1) < 0,PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
1894 
1895   /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1896      For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1897    */
1898   ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm);CHKERRMPI(ierr);
1899   ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRMPI(ierr);
1900 
1901   mumps->scat_rhs = NULL;
1902   mumps->scat_sol = NULL;
1903 
1904   /* set PETSc-MUMPS default options - override MUMPS default */
1905   mumps->id.ICNTL(3) = 0;
1906   mumps->id.ICNTL(4) = 0;
1907   if (mumps->petsc_size == 1) {
1908     mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1909     mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1910   } else {
1911     mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1912     mumps->id.ICNTL(21) = 1;   /* distributed solution */
1913   }
1914 
1915   /* schur */
1916   mumps->id.size_schur    = 0;
1917   mumps->id.listvar_schur = NULL;
1918   mumps->id.schur         = NULL;
1919   mumps->sizeredrhs       = 0;
1920   mumps->schur_sol        = NULL;
1921   mumps->schur_sizesol    = 0;
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1926 {
1927   PetscErrorCode ierr;
1928 
1929   PetscFunctionBegin;
1930   if (mumps->id.INFOG(1) < 0) {
1931     if (A->erroriffailure) {
1932       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1));
1933     } else {
1934       if (mumps->id.INFOG(1) == -6) {
1935         ierr = PetscInfo(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1936         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1937       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1938         ierr = PetscInfo(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1939         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1940       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1941         ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr);
1942       } else {
1943         ierr = PetscInfo(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1944         F->factorerrortype = MAT_FACTOR_OTHER;
1945       }
1946     }
1947   }
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1952 {
1953   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1954   PetscErrorCode ierr;
1955   Vec            b;
1956   const PetscInt M = A->rmap->N;
1957 
1958   PetscFunctionBegin;
1959   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1960     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1961     PetscFunctionReturn(0);
1962   }
1963 
1964   /* Set MUMPS options from the options database */
1965   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
1966 
1967   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
1968   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
1969 
1970   /* analysis phase */
1971   /*----------------*/
1972   mumps->id.job = JOB_FACTSYMBOLIC;
1973   mumps->id.n   = M;
1974   switch (mumps->id.ICNTL(18)) {
1975   case 0:  /* centralized assembled matrix input */
1976     if (!mumps->myid) {
1977       mumps->id.nnz = mumps->nnz;
1978       mumps->id.irn = mumps->irn;
1979       mumps->id.jcn = mumps->jcn;
1980       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1981       if (r) {
1982         mumps->id.ICNTL(7) = 1;
1983         if (!mumps->myid) {
1984           const PetscInt *idx;
1985           PetscInt       i;
1986 
1987           ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr);
1988           ierr = ISGetIndices(r,&idx);CHKERRQ(ierr);
1989           for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */
1990           ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr);
1991         }
1992       }
1993     }
1994     break;
1995   case 3:  /* distributed assembled matrix input (size>1) */
1996     mumps->id.nnz_loc = mumps->nnz;
1997     mumps->id.irn_loc = mumps->irn;
1998     mumps->id.jcn_loc = mumps->jcn;
1999     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
2000     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2001       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2002       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2003       ierr = VecDestroy(&b);CHKERRQ(ierr);
2004     }
2005     break;
2006   }
2007   PetscMUMPS_c(mumps);
2008   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2009 
2010   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2011   F->ops->solve           = MatSolve_MUMPS;
2012   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2013   F->ops->matsolve        = MatMatSolve_MUMPS;
2014   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2015 
2016   mumps->matstruc = SAME_NONZERO_PATTERN;
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 /* Note the Petsc r and c permutations are ignored */
2021 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2022 {
2023   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2024   PetscErrorCode ierr;
2025   Vec            b;
2026   const PetscInt M = A->rmap->N;
2027 
2028   PetscFunctionBegin;
2029   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2030     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2031     PetscFunctionReturn(0);
2032   }
2033 
2034   /* Set MUMPS options from the options database */
2035   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2036 
2037   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2038   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2039 
2040   /* analysis phase */
2041   /*----------------*/
2042   mumps->id.job = JOB_FACTSYMBOLIC;
2043   mumps->id.n   = M;
2044   switch (mumps->id.ICNTL(18)) {
2045   case 0:  /* centralized assembled matrix input */
2046     if (!mumps->myid) {
2047       mumps->id.nnz = mumps->nnz;
2048       mumps->id.irn = mumps->irn;
2049       mumps->id.jcn = mumps->jcn;
2050       if (mumps->id.ICNTL(6)>1) {
2051         mumps->id.a = (MumpsScalar*)mumps->val;
2052       }
2053     }
2054     break;
2055   case 3:  /* distributed assembled matrix input (size>1) */
2056     mumps->id.nnz_loc = mumps->nnz;
2057     mumps->id.irn_loc = mumps->irn;
2058     mumps->id.jcn_loc = mumps->jcn;
2059     if (mumps->id.ICNTL(6)>1) {
2060       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2061     }
2062     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2063       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2064       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2065       ierr = VecDestroy(&b);CHKERRQ(ierr);
2066     }
2067     break;
2068   }
2069   PetscMUMPS_c(mumps);
2070   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2071 
2072   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2073   F->ops->solve           = MatSolve_MUMPS;
2074   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2075 
2076   mumps->matstruc = SAME_NONZERO_PATTERN;
2077   PetscFunctionReturn(0);
2078 }
2079 
2080 /* Note the Petsc r permutation and factor info are ignored */
2081 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2082 {
2083   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2084   PetscErrorCode ierr;
2085   Vec            b;
2086   const PetscInt M = A->rmap->N;
2087 
2088   PetscFunctionBegin;
2089   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2090     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2091     PetscFunctionReturn(0);
2092   }
2093 
2094   /* Set MUMPS options from the options database */
2095   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2096 
2097   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2098   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2099 
2100   /* analysis phase */
2101   /*----------------*/
2102   mumps->id.job = JOB_FACTSYMBOLIC;
2103   mumps->id.n   = M;
2104   switch (mumps->id.ICNTL(18)) {
2105   case 0:  /* centralized assembled matrix input */
2106     if (!mumps->myid) {
2107       mumps->id.nnz = mumps->nnz;
2108       mumps->id.irn = mumps->irn;
2109       mumps->id.jcn = mumps->jcn;
2110       if (mumps->id.ICNTL(6)>1) {
2111         mumps->id.a = (MumpsScalar*)mumps->val;
2112       }
2113     }
2114     break;
2115   case 3:  /* distributed assembled matrix input (size>1) */
2116     mumps->id.nnz_loc = mumps->nnz;
2117     mumps->id.irn_loc = mumps->irn;
2118     mumps->id.jcn_loc = mumps->jcn;
2119     if (mumps->id.ICNTL(6)>1) {
2120       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2121     }
2122     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2123       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2124       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2125       ierr = VecDestroy(&b);CHKERRQ(ierr);
2126     }
2127     break;
2128   }
2129   PetscMUMPS_c(mumps);
2130   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2131 
2132   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2133   F->ops->solve                 = MatSolve_MUMPS;
2134   F->ops->solvetranspose        = MatSolve_MUMPS;
2135   F->ops->matsolve              = MatMatSolve_MUMPS;
2136   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2137 #if defined(PETSC_USE_COMPLEX)
2138   F->ops->getinertia = NULL;
2139 #else
2140   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2141 #endif
2142 
2143   mumps->matstruc = SAME_NONZERO_PATTERN;
2144   PetscFunctionReturn(0);
2145 }
2146 
2147 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2148 {
2149   PetscErrorCode    ierr;
2150   PetscBool         iascii;
2151   PetscViewerFormat format;
2152   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2153 
2154   PetscFunctionBegin;
2155   /* check if matrix is mumps type */
2156   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2157 
2158   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
2159   if (iascii) {
2160     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
2161     if (format == PETSC_VIEWER_ASCII_INFO) {
2162       ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
2163       ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d\n",mumps->id.sym);CHKERRQ(ierr);
2164       ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d\n",mumps->id.par);CHKERRQ(ierr);
2165       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d\n",mumps->id.ICNTL(1));CHKERRQ(ierr);
2166       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2));CHKERRQ(ierr);
2167       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d\n",mumps->id.ICNTL(3));CHKERRQ(ierr);
2168       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d\n",mumps->id.ICNTL(4));CHKERRQ(ierr);
2169       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d\n",mumps->id.ICNTL(5));CHKERRQ(ierr);
2170       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d\n",mumps->id.ICNTL(6));CHKERRQ(ierr);
2171       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7));CHKERRQ(ierr);
2172       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d\n",mumps->id.ICNTL(8));CHKERRQ(ierr);
2173       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d\n",mumps->id.ICNTL(10));CHKERRQ(ierr);
2174       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d\n",mumps->id.ICNTL(11));CHKERRQ(ierr);
2175       if (mumps->id.ICNTL(11)>0) {
2176         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr);
2177         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr);
2178         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr);
2179         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr);
2180         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr);
2181         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr);
2182       }
2183       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d\n",mumps->id.ICNTL(12));CHKERRQ(ierr);
2184       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d\n",mumps->id.ICNTL(13));CHKERRQ(ierr);
2185       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14));CHKERRQ(ierr);
2186       /* ICNTL(15-17) not used */
2187       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d\n",mumps->id.ICNTL(18));CHKERRQ(ierr);
2188       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d\n",mumps->id.ICNTL(19));CHKERRQ(ierr);
2189       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d\n",mumps->id.ICNTL(20));CHKERRQ(ierr);
2190       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d\n",mumps->id.ICNTL(21));CHKERRQ(ierr);
2191       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d\n",mumps->id.ICNTL(22));CHKERRQ(ierr);
2192       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23));CHKERRQ(ierr);
2193 
2194       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d\n",mumps->id.ICNTL(24));CHKERRQ(ierr);
2195       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d\n",mumps->id.ICNTL(25));CHKERRQ(ierr);
2196       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d\n",mumps->id.ICNTL(26));CHKERRQ(ierr);
2197       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d\n",mumps->id.ICNTL(27));CHKERRQ(ierr);
2198       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d\n",mumps->id.ICNTL(28));CHKERRQ(ierr);
2199       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d\n",mumps->id.ICNTL(29));CHKERRQ(ierr);
2200 
2201       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n",mumps->id.ICNTL(30));CHKERRQ(ierr);
2202       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d\n",mumps->id.ICNTL(31));CHKERRQ(ierr);
2203       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d\n",mumps->id.ICNTL(33));CHKERRQ(ierr);
2204       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d\n",mumps->id.ICNTL(35));CHKERRQ(ierr);
2205       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d\n",mumps->id.ICNTL(36));CHKERRQ(ierr);
2206       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d\n",mumps->id.ICNTL(38));CHKERRQ(ierr);
2207 
2208       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1));CHKERRQ(ierr);
2209       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr);
2210       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3));CHKERRQ(ierr);
2211       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4));CHKERRQ(ierr);
2212       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5));CHKERRQ(ierr);
2213       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7));CHKERRQ(ierr);
2214 
2215       /* information local to each processor */
2216       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
2217       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
2218       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr);
2219       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2220       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
2221       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr);
2222       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2223       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
2224       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr);
2225       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2226 
2227       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);
2228       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d\n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr);
2229       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2230 
2231       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);
2232       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr);
2233       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2234 
2235       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);
2236       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr);
2237       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2238 
2239       if (mumps->ninfo && mumps->ninfo <= 80) {
2240         PetscInt i;
2241         for (i=0; i<mumps->ninfo; i++) {
2242           ierr = PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "): \n",mumps->info[i]);CHKERRQ(ierr);
2243           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr);
2244           ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2245         }
2246       }
2247       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
2248 
2249       if (!mumps->myid) { /* information from the host */
2250         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr);
2251         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr);
2252         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr);
2253         ierr = PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr);
2254 
2255         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3));CHKERRQ(ierr);
2256         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4));CHKERRQ(ierr);
2257         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5));CHKERRQ(ierr);
2258         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6));CHKERRQ(ierr);
2259         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7));CHKERRQ(ierr);
2260         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8));CHKERRQ(ierr);
2261         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9));CHKERRQ(ierr);
2262         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10));CHKERRQ(ierr);
2263         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11));CHKERRQ(ierr);
2264         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12));CHKERRQ(ierr);
2265         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13));CHKERRQ(ierr);
2266         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14));CHKERRQ(ierr);
2267         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15));CHKERRQ(ierr);
2268         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16));CHKERRQ(ierr);
2269         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17));CHKERRQ(ierr);
2270         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18));CHKERRQ(ierr);
2271         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19));CHKERRQ(ierr);
2272         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20));CHKERRQ(ierr);
2273         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21));CHKERRQ(ierr);
2274         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22));CHKERRQ(ierr);
2275         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23));CHKERRQ(ierr);
2276         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24));CHKERRQ(ierr);
2277         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25));CHKERRQ(ierr);
2278         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr);
2279         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr);
2280         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr);
2281         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr);
2282         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr);
2283         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr);
2284         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr);
2285         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36));CHKERRQ(ierr);
2286         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37));CHKERRQ(ierr);
2287         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38));CHKERRQ(ierr);
2288         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39));CHKERRQ(ierr);
2289       }
2290     }
2291   }
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2296 {
2297   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2298 
2299   PetscFunctionBegin;
2300   info->block_size        = 1.0;
2301   info->nz_allocated      = mumps->id.INFOG(20);
2302   info->nz_used           = mumps->id.INFOG(20);
2303   info->nz_unneeded       = 0.0;
2304   info->assemblies        = 0.0;
2305   info->mallocs           = 0.0;
2306   info->memory            = 0.0;
2307   info->fill_ratio_given  = 0;
2308   info->fill_ratio_needed = 0;
2309   info->factor_mallocs    = 0;
2310   PetscFunctionReturn(0);
2311 }
2312 
2313 /* -------------------------------------------------------------------------------------------*/
2314 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2315 {
2316   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2317   const PetscScalar *arr;
2318   const PetscInt    *idxs;
2319   PetscInt          size,i;
2320   PetscErrorCode    ierr;
2321 
2322   PetscFunctionBegin;
2323   ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr);
2324   if (mumps->petsc_size > 1) {
2325     PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2326 
2327     ls   = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2328     ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRMPI(ierr);
2329     PetscCheckFalse(!gs,PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc");
2330   }
2331 
2332   /* Schur complement matrix */
2333   ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
2334   ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr);
2335   ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr);
2336   mumps->id.schur      = (MumpsScalar*)arr;
2337   mumps->id.size_schur = size;
2338   mumps->id.schur_lld  = size;
2339   ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr);
2340   if (mumps->sym == 1) {
2341     ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
2342   }
2343 
2344   /* MUMPS expects Fortran style indices */
2345   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
2346   ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr);
2347   ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr);
2348   for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);}
2349   ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr);
2350   if (mumps->petsc_size > 1) {
2351     mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2352   } else {
2353     if (F->factortype == MAT_FACTOR_LU) {
2354       mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2355     } else {
2356       mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2357     }
2358   }
2359   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2360   mumps->id.ICNTL(26) = -1;
2361   PetscFunctionReturn(0);
2362 }
2363 
2364 /* -------------------------------------------------------------------------------------------*/
2365 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2366 {
2367   Mat            St;
2368   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2369   PetscScalar    *array;
2370 #if defined(PETSC_USE_COMPLEX)
2371   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2372 #endif
2373   PetscErrorCode ierr;
2374 
2375   PetscFunctionBegin;
2376   PetscCheckFalse(!mumps->id.ICNTL(19),PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2377   ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr);
2378   ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr);
2379   ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr);
2380   ierr = MatSetUp(St);CHKERRQ(ierr);
2381   ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr);
2382   if (!mumps->sym) { /* MUMPS always return a full matrix */
2383     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2384       PetscInt i,j,N=mumps->id.size_schur;
2385       for (i=0;i<N;i++) {
2386         for (j=0;j<N;j++) {
2387 #if !defined(PETSC_USE_COMPLEX)
2388           PetscScalar val = mumps->id.schur[i*N+j];
2389 #else
2390           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2391 #endif
2392           array[j*N+i] = val;
2393         }
2394       }
2395     } else { /* stored by columns */
2396       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2397     }
2398   } else { /* either full or lower-triangular (not packed) */
2399     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2400       PetscInt i,j,N=mumps->id.size_schur;
2401       for (i=0;i<N;i++) {
2402         for (j=i;j<N;j++) {
2403 #if !defined(PETSC_USE_COMPLEX)
2404           PetscScalar val = mumps->id.schur[i*N+j];
2405 #else
2406           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2407 #endif
2408           array[i*N+j] = val;
2409           array[j*N+i] = val;
2410         }
2411       }
2412     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2413       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2414     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2415       PetscInt i,j,N=mumps->id.size_schur;
2416       for (i=0;i<N;i++) {
2417         for (j=0;j<i+1;j++) {
2418 #if !defined(PETSC_USE_COMPLEX)
2419           PetscScalar val = mumps->id.schur[i*N+j];
2420 #else
2421           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2422 #endif
2423           array[i*N+j] = val;
2424           array[j*N+i] = val;
2425         }
2426       }
2427     }
2428   }
2429   ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr);
2430   *S   = St;
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 /* -------------------------------------------------------------------------------------------*/
2435 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2436 {
2437   PetscErrorCode ierr;
2438   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2439 
2440   PetscFunctionBegin;
2441   ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr);
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2446 {
2447   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2448 
2449   PetscFunctionBegin;
2450   *ival = mumps->id.ICNTL(icntl);
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 /*@
2455   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2456 
2457    Logically Collective on Mat
2458 
2459    Input Parameters:
2460 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2461 .  icntl - index of MUMPS parameter array ICNTL()
2462 -  ival - value of MUMPS ICNTL(icntl)
2463 
2464   Options Database:
2465 .   -mat_mumps_icntl_<icntl> <ival> - change the option numbered icntl to ival
2466 
2467    Level: beginner
2468 
2469    References:
2470 .  * - MUMPS Users' Guide
2471 
2472 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2473 @*/
2474 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2475 {
2476   PetscErrorCode ierr;
2477 
2478   PetscFunctionBegin;
2479   PetscValidType(F,1);
2480   PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2481   PetscValidLogicalCollectiveInt(F,icntl,2);
2482   PetscValidLogicalCollectiveInt(F,ival,3);
2483   ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 /*@
2488   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2489 
2490    Logically Collective on Mat
2491 
2492    Input Parameters:
2493 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2494 -  icntl - index of MUMPS parameter array ICNTL()
2495 
2496   Output Parameter:
2497 .  ival - value of MUMPS ICNTL(icntl)
2498 
2499    Level: beginner
2500 
2501    References:
2502 .  * - MUMPS Users' Guide
2503 
2504 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2505 @*/
2506 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2507 {
2508   PetscErrorCode ierr;
2509 
2510   PetscFunctionBegin;
2511   PetscValidType(F,1);
2512   PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2513   PetscValidLogicalCollectiveInt(F,icntl,2);
2514   PetscValidIntPointer(ival,3);
2515   ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 /* -------------------------------------------------------------------------------------------*/
2520 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2521 {
2522   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2523 
2524   PetscFunctionBegin;
2525   mumps->id.CNTL(icntl) = val;
2526   PetscFunctionReturn(0);
2527 }
2528 
2529 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2530 {
2531   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2532 
2533   PetscFunctionBegin;
2534   *val = mumps->id.CNTL(icntl);
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 /*@
2539   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2540 
2541    Logically Collective on Mat
2542 
2543    Input Parameters:
2544 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2545 .  icntl - index of MUMPS parameter array CNTL()
2546 -  val - value of MUMPS CNTL(icntl)
2547 
2548   Options Database:
2549 .   -mat_mumps_cntl_<icntl> <val>  - change the option numbered icntl to ival
2550 
2551    Level: beginner
2552 
2553    References:
2554 .  * - MUMPS Users' Guide
2555 
2556 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2557 @*/
2558 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2559 {
2560   PetscErrorCode ierr;
2561 
2562   PetscFunctionBegin;
2563   PetscValidType(F,1);
2564   PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2565   PetscValidLogicalCollectiveInt(F,icntl,2);
2566   PetscValidLogicalCollectiveReal(F,val,3);
2567   ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr);
2568   PetscFunctionReturn(0);
2569 }
2570 
2571 /*@
2572   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2573 
2574    Logically Collective on Mat
2575 
2576    Input Parameters:
2577 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2578 -  icntl - index of MUMPS parameter array CNTL()
2579 
2580   Output Parameter:
2581 .  val - value of MUMPS CNTL(icntl)
2582 
2583    Level: beginner
2584 
2585    References:
2586 .  * - MUMPS Users' Guide
2587 
2588 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2589 @*/
2590 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2591 {
2592   PetscErrorCode ierr;
2593 
2594   PetscFunctionBegin;
2595   PetscValidType(F,1);
2596   PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2597   PetscValidLogicalCollectiveInt(F,icntl,2);
2598   PetscValidRealPointer(val,3);
2599   ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2604 {
2605   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2606 
2607   PetscFunctionBegin;
2608   *info = mumps->id.INFO(icntl);
2609   PetscFunctionReturn(0);
2610 }
2611 
2612 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2613 {
2614   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2615 
2616   PetscFunctionBegin;
2617   *infog = mumps->id.INFOG(icntl);
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2622 {
2623   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2624 
2625   PetscFunctionBegin;
2626   *rinfo = mumps->id.RINFO(icntl);
2627   PetscFunctionReturn(0);
2628 }
2629 
2630 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2631 {
2632   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2633 
2634   PetscFunctionBegin;
2635   *rinfog = mumps->id.RINFOG(icntl);
2636   PetscFunctionReturn(0);
2637 }
2638 
2639 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2640 {
2641   PetscErrorCode ierr;
2642   Mat            Bt = NULL,Btseq = NULL;
2643   PetscBool      flg;
2644   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2645   PetscScalar    *aa;
2646   PetscInt       spnr,*ia,*ja,M,nrhs;
2647 
2648   PetscFunctionBegin;
2649   PetscValidPointer(spRHS,2);
2650   ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr);
2651   if (flg) {
2652     ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr);
2653   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2654 
2655   ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr);
2656 
2657   if (mumps->petsc_size > 1) {
2658     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2659     Btseq = b->A;
2660   } else {
2661     Btseq = Bt;
2662   }
2663 
2664   ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr);
2665   mumps->id.nrhs = nrhs;
2666   mumps->id.lrhs = M;
2667   mumps->id.rhs  = NULL;
2668 
2669   if (!mumps->myid) {
2670     ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr);
2671     ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2672     PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2673     ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
2674     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2675   } else {
2676     mumps->id.irhs_ptr    = NULL;
2677     mumps->id.irhs_sparse = NULL;
2678     mumps->id.nz_rhs      = 0;
2679     mumps->id.rhs_sparse  = NULL;
2680   }
2681   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2682   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2683 
2684   /* solve phase */
2685   /*-------------*/
2686   mumps->id.job = JOB_SOLVE;
2687   PetscMUMPS_c(mumps);
2688   if (mumps->id.INFOG(1) < 0)
2689     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
2690 
2691   if (!mumps->myid) {
2692     ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr);
2693     ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2694     PetscCheckFalse(!flg,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2695   }
2696   PetscFunctionReturn(0);
2697 }
2698 
2699 /*@
2700   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2701 
2702    Logically Collective on Mat
2703 
2704    Input Parameters:
2705 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2706 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2707 
2708   Output Parameter:
2709 . spRHS - requested entries of inverse of A
2710 
2711    Level: beginner
2712 
2713    References:
2714 .  * - MUMPS Users' Guide
2715 
2716 .seealso: MatGetFactor(), MatCreateTranspose()
2717 @*/
2718 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2719 {
2720   PetscErrorCode ierr;
2721 
2722   PetscFunctionBegin;
2723   PetscValidType(F,1);
2724   PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2725   ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr);
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2730 {
2731   PetscErrorCode ierr;
2732   Mat            spRHS;
2733 
2734   PetscFunctionBegin;
2735   ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr);
2736   ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr);
2737   ierr = MatDestroy(&spRHS);CHKERRQ(ierr);
2738   PetscFunctionReturn(0);
2739 }
2740 
2741 /*@
2742   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2743 
2744    Logically Collective on Mat
2745 
2746    Input Parameters:
2747 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2748 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2749 
2750   Output Parameter:
2751 . spRHST - requested entries of inverse of A^T
2752 
2753    Level: beginner
2754 
2755    References:
2756 .  * - MUMPS Users' Guide
2757 
2758 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2759 @*/
2760 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2761 {
2762   PetscErrorCode ierr;
2763   PetscBool      flg;
2764 
2765   PetscFunctionBegin;
2766   PetscValidType(F,1);
2767   PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2768   ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
2769   PetscCheckFalse(!flg,PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2770 
2771   ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr);
2772   PetscFunctionReturn(0);
2773 }
2774 
2775 /*@
2776   MatMumpsGetInfo - Get MUMPS parameter INFO()
2777 
2778    Logically Collective on Mat
2779 
2780    Input Parameters:
2781 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2782 -  icntl - index of MUMPS parameter array INFO()
2783 
2784   Output Parameter:
2785 .  ival - value of MUMPS INFO(icntl)
2786 
2787    Level: beginner
2788 
2789    References:
2790 .  * - MUMPS Users' Guide
2791 
2792 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2793 @*/
2794 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2795 {
2796   PetscErrorCode ierr;
2797 
2798   PetscFunctionBegin;
2799   PetscValidType(F,1);
2800   PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2801   PetscValidIntPointer(ival,3);
2802   ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2803   PetscFunctionReturn(0);
2804 }
2805 
2806 /*@
2807   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2808 
2809    Logically Collective on Mat
2810 
2811    Input Parameters:
2812 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2813 -  icntl - index of MUMPS parameter array INFOG()
2814 
2815   Output Parameter:
2816 .  ival - value of MUMPS INFOG(icntl)
2817 
2818    Level: beginner
2819 
2820    References:
2821 .  * - MUMPS Users' Guide
2822 
2823 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2824 @*/
2825 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2826 {
2827   PetscErrorCode ierr;
2828 
2829   PetscFunctionBegin;
2830   PetscValidType(F,1);
2831   PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2832   PetscValidIntPointer(ival,3);
2833   ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2834   PetscFunctionReturn(0);
2835 }
2836 
2837 /*@
2838   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2839 
2840    Logically Collective on Mat
2841 
2842    Input Parameters:
2843 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2844 -  icntl - index of MUMPS parameter array RINFO()
2845 
2846   Output Parameter:
2847 .  val - value of MUMPS RINFO(icntl)
2848 
2849    Level: beginner
2850 
2851    References:
2852 .  * - MUMPS Users' Guide
2853 
2854 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2855 @*/
2856 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2857 {
2858   PetscErrorCode ierr;
2859 
2860   PetscFunctionBegin;
2861   PetscValidType(F,1);
2862   PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2863   PetscValidRealPointer(val,3);
2864   ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2865   PetscFunctionReturn(0);
2866 }
2867 
2868 /*@
2869   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2870 
2871    Logically Collective on Mat
2872 
2873    Input Parameters:
2874 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2875 -  icntl - index of MUMPS parameter array RINFOG()
2876 
2877   Output Parameter:
2878 .  val - value of MUMPS RINFOG(icntl)
2879 
2880    Level: beginner
2881 
2882    References:
2883 .  * - MUMPS Users' Guide
2884 
2885 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2886 @*/
2887 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2888 {
2889   PetscErrorCode ierr;
2890 
2891   PetscFunctionBegin;
2892   PetscValidType(F,1);
2893   PetscCheckFalse(!F->factortype,PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2894   PetscValidRealPointer(val,3);
2895   ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2896   PetscFunctionReturn(0);
2897 }
2898 
2899 /*MC
2900   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2901   distributed and sequential matrices via the external package MUMPS.
2902 
2903   Works with MATAIJ and MATSBAIJ matrices
2904 
2905   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2906 
2907   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2908 
2909   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2910 
2911   Options Database Keys:
2912 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2913 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2914 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2915 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2916 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2917 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2918                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2919 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2920 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2921 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2922 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2923 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2924 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2925 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2926 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2927 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2928 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2929 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2930 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2931 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2932 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2933 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2934 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2935 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2936 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2937 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2938 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2939 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2940 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2941 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2942 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2943 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2944 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2945 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2946 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2947                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2948 
2949   Level: beginner
2950 
2951     Notes:
2952     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2953 
2954     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2955 $          KSPGetPC(ksp,&pc);
2956 $          PCFactorGetMatrix(pc,&mat);
2957 $          MatMumpsGetInfo(mat,....);
2958 $          MatMumpsGetInfog(mat,....); etc.
2959            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2960 
2961   Using MUMPS with 64-bit integers
2962     MUMPS provides 64-bit integer support in two build modes:
2963       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2964       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2965 
2966       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2967       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2968       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2969       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2970 
2971     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2972 
2973   Two modes to run MUMPS/PETSc with OpenMP
2974 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2975 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2976 
2977 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2978 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2979 
2980    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2981    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2982    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2983    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2984    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2985 
2986    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2987    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2988    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2989    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2990    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2991    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2992    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2993    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2994    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2995    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2996    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2997    examine the mapping result.
2998 
2999    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
3000    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
3001    calls omp_set_num_threads(m) internally before calling MUMPS.
3002 
3003    References:
3004 +  * - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
3005 -  * - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3006 
3007 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix()
3008 
3009 M*/
3010 
3011 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
3012 {
3013   PetscFunctionBegin;
3014   *type = MATSOLVERMUMPS;
3015   PetscFunctionReturn(0);
3016 }
3017 
3018 /* MatGetFactor for Seq and MPI AIJ matrices */
3019 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
3020 {
3021   Mat            B;
3022   PetscErrorCode ierr;
3023   Mat_MUMPS      *mumps;
3024   PetscBool      isSeqAIJ;
3025   PetscMPIInt    size;
3026 
3027   PetscFunctionBegin;
3028  #if defined(PETSC_USE_COMPLEX)
3029   PetscCheckFalse(A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3030  #endif
3031   /* Create the factorization matrix */
3032   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
3033   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3034   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3035   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3036   ierr = MatSetUp(B);CHKERRQ(ierr);
3037 
3038   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3039 
3040   B->ops->view    = MatView_MUMPS;
3041   B->ops->getinfo = MatGetInfo_MUMPS;
3042 
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3056 
3057   if (ftype == MAT_FACTOR_LU) {
3058     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3059     B->factortype            = MAT_FACTOR_LU;
3060     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3061     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3062     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3063     mumps->sym = 0;
3064   } else {
3065     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3066     B->factortype                  = MAT_FACTOR_CHOLESKY;
3067     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3068     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3069     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr);
3070 #if defined(PETSC_USE_COMPLEX)
3071     mumps->sym = 2;
3072 #else
3073     if (A->spd_set && A->spd) mumps->sym = 1;
3074     else                      mumps->sym = 2;
3075 #endif
3076   }
3077 
3078   /* set solvertype */
3079   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3080   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3081   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3082   if (size == 1) {
3083     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3084     B->canuseordering = PETSC_TRUE;
3085   }
3086   B->ops->destroy = MatDestroy_MUMPS;
3087   B->data         = (void*)mumps;
3088 
3089   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3090 
3091   *F = B;
3092   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3093   PetscFunctionReturn(0);
3094 }
3095 
3096 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3097 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3098 {
3099   Mat            B;
3100   PetscErrorCode ierr;
3101   Mat_MUMPS      *mumps;
3102   PetscBool      isSeqSBAIJ;
3103   PetscMPIInt    size;
3104 
3105   PetscFunctionBegin;
3106  #if defined(PETSC_USE_COMPLEX)
3107   PetscCheckFalse(A->hermitian && !A->symmetric,PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3108  #endif
3109   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3110   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3111   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3112   ierr = MatSetUp(B);CHKERRQ(ierr);
3113 
3114   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3115   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr);
3116   if (isSeqSBAIJ) {
3117     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3118   } else {
3119     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3120   }
3121 
3122   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3123   B->ops->view                   = MatView_MUMPS;
3124   B->ops->getinfo                = MatGetInfo_MUMPS;
3125 
3126   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3127   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3128   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3129   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3130   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3131   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3132   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3133   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3134   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3135   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3136   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3137   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3138   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3139 
3140   B->factortype = MAT_FACTOR_CHOLESKY;
3141 #if defined(PETSC_USE_COMPLEX)
3142   mumps->sym = 2;
3143 #else
3144   if (A->spd_set && A->spd) mumps->sym = 1;
3145   else                      mumps->sym = 2;
3146 #endif
3147 
3148   /* set solvertype */
3149   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3150   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3151   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3152   if (size == 1) {
3153     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3154     B->canuseordering = PETSC_TRUE;
3155   }
3156   ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr);
3157   B->ops->destroy = MatDestroy_MUMPS;
3158   B->data         = (void*)mumps;
3159 
3160   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3161 
3162   *F = B;
3163   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3164   PetscFunctionReturn(0);
3165 }
3166 
3167 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3168 {
3169   Mat            B;
3170   PetscErrorCode ierr;
3171   Mat_MUMPS      *mumps;
3172   PetscBool      isSeqBAIJ;
3173   PetscMPIInt    size;
3174 
3175   PetscFunctionBegin;
3176   /* Create the factorization matrix */
3177   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr);
3178   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3179   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3180   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3181   ierr = MatSetUp(B);CHKERRQ(ierr);
3182 
3183   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3184   if (ftype == MAT_FACTOR_LU) {
3185     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3186     B->factortype            = MAT_FACTOR_LU;
3187     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3188     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3189     mumps->sym = 0;
3190     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3191   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3192 
3193   B->ops->view        = MatView_MUMPS;
3194   B->ops->getinfo     = MatGetInfo_MUMPS;
3195 
3196   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3197   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3198   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3199   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3200   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3201   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3202   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3203   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3204   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3205   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3206   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3207   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3208   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3209 
3210   /* set solvertype */
3211   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3212   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3213   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3214   if (size == 1) {
3215     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3216     B->canuseordering = PETSC_TRUE;
3217   }
3218   B->ops->destroy = MatDestroy_MUMPS;
3219   B->data         = (void*)mumps;
3220 
3221   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3222 
3223   *F = B;
3224   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3225   PetscFunctionReturn(0);
3226 }
3227 
3228 /* MatGetFactor for Seq and MPI SELL matrices */
3229 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3230 {
3231   Mat            B;
3232   PetscErrorCode ierr;
3233   Mat_MUMPS      *mumps;
3234   PetscBool      isSeqSELL;
3235   PetscMPIInt    size;
3236 
3237   PetscFunctionBegin;
3238   /* Create the factorization matrix */
3239   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr);
3240   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3241   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3242   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3243   ierr = MatSetUp(B);CHKERRQ(ierr);
3244 
3245   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3246 
3247   B->ops->view        = MatView_MUMPS;
3248   B->ops->getinfo     = MatGetInfo_MUMPS;
3249 
3250   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3251   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3252   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3253   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3254   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3255   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3256   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3257   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3258   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3259   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3260   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3261 
3262   if (ftype == MAT_FACTOR_LU) {
3263     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3264     B->factortype            = MAT_FACTOR_LU;
3265     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3266     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3267     mumps->sym = 0;
3268     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3269   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3270 
3271   /* set solvertype */
3272   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3273   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3274   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3275   if (size == 1) {
3276     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3277     B->canuseordering = PETSC_TRUE;
3278   }
3279   B->ops->destroy = MatDestroy_MUMPS;
3280   B->data         = (void*)mumps;
3281 
3282   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3283 
3284   *F = B;
3285   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3286   PetscFunctionReturn(0);
3287 }
3288 
3289 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3290 {
3291   PetscErrorCode ierr;
3292 
3293   PetscFunctionBegin;
3294   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3295   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3296   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3297   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3298   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3299   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3300   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3301   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3302   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3303   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3304   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr);
3305   PetscFunctionReturn(0);
3306 }
3307 
3308