xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision a6f03b2cfb01b79a0375d4b4e78f75e18dd6b346)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30 
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45 
46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48    naming convention in PetscMPIInt, PetscBLASInt etc.
49 */
50 typedef MUMPS_INT PetscMUMPSInt;
51 
52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55   #endif
56 #else
57   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #endif
61 
62 #define MPIU_MUMPSINT             MPI_INT
63 #define PETSC_MUMPS_INT_MAX       2147483647
64 #define PETSC_MUMPS_INT_MIN       -2147483648
65 
66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68 {
69   PetscFunctionBegin;
70   if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
71   *b = (PetscMUMPSInt)(a);
72   PetscFunctionReturn(0);
73 }
74 
75 /* Put these utility routines here since they are only used in this file */
76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
77 {
78   PetscErrorCode ierr;
79   PetscInt       myval;
80   PetscBool      myset;
81   PetscFunctionBegin;
82   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
83   ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr);
84   if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);}
85   if (set) *set = myset;
86   PetscFunctionReturn(0);
87 }
88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
89 
90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
91 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
92 #define PetscMUMPS_c(mumps) \
93   do { \
94     if (mumps->use_petsc_omp_support) { \
95       if (mumps->is_omp_master) { \
96         ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \
97         MUMPS_c(&mumps->id); \
98         ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \
99       } \
100       ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \
101       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
102          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
103          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
104          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105       */ \
106       ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\
107       ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm);CHKERRMPI(ierr);\
108       ierr = MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\
109     } else { \
110       MUMPS_c(&mumps->id); \
111     } \
112   } while (0)
113 #else
114 #define PetscMUMPS_c(mumps) \
115   do { MUMPS_c(&mumps->id); } while (0)
116 #endif
117 
118 /* declare MumpsScalar */
119 #if defined(PETSC_USE_COMPLEX)
120 #if defined(PETSC_USE_REAL_SINGLE)
121 #define MumpsScalar mumps_complex
122 #else
123 #define MumpsScalar mumps_double_complex
124 #endif
125 #else
126 #define MumpsScalar PetscScalar
127 #endif
128 
129 /* macros s.t. indices match MUMPS documentation */
130 #define ICNTL(I) icntl[(I)-1]
131 #define CNTL(I) cntl[(I)-1]
132 #define INFOG(I) infog[(I)-1]
133 #define INFO(I) info[(I)-1]
134 #define RINFOG(I) rinfog[(I)-1]
135 #define RINFO(I) rinfo[(I)-1]
136 
137 typedef struct Mat_MUMPS Mat_MUMPS;
138 struct Mat_MUMPS {
139 #if defined(PETSC_USE_COMPLEX)
140 #if defined(PETSC_USE_REAL_SINGLE)
141   CMUMPS_STRUC_C id;
142 #else
143   ZMUMPS_STRUC_C id;
144 #endif
145 #else
146 #if defined(PETSC_USE_REAL_SINGLE)
147   SMUMPS_STRUC_C id;
148 #else
149   DMUMPS_STRUC_C id;
150 #endif
151 #endif
152 
153   MatStructure   matstruc;
154   PetscMPIInt    myid,petsc_size;
155   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
156   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
157   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
158   PetscMUMPSInt  sym;
159   MPI_Comm       mumps_comm;
160   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
161   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
162   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
163   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
164 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
165   PetscInt       *rhs_nrow,max_nrhs;
166   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
167   PetscScalar    *rhs_loc,*rhs_recvbuf;
168 #endif
169   Vec            b_seq,x_seq;
170   PetscInt       ninfo,*info;           /* which INFO to display */
171   PetscInt       sizeredrhs;
172   PetscScalar    *schur_sol;
173   PetscInt       schur_sizesol;
174   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
175   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
176   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
177 
178   /* stuff used by petsc/mumps OpenMP support*/
179   PetscBool      use_petsc_omp_support;
180   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
181   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
182   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
183   PetscMPIInt    tag,omp_comm_size;
184   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
185   MPI_Request    *reqs;
186 };
187 
188 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
189    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
190  */
191 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
192 {
193   PetscErrorCode ierr;
194   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_USE_64BIT_INDICES)
198   {
199     PetscInt i;
200     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201       ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
202       ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr);
203       mumps->cur_ilen = nrow+1;
204     }
205     if (nnz > mumps->cur_jlen) {
206       ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
207       ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr);
208       mumps->cur_jlen = nnz;
209     }
210     for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);}
211     for (i=0; i<nnz; i++)    {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);}
212     *ia_mumps = mumps->ia_alloc;
213     *ja_mumps = mumps->ja_alloc;
214   }
215 #else
216   *ia_mumps = ia;
217   *ja_mumps = ja;
218 #endif
219   ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
224 {
225   PetscErrorCode ierr;
226 
227   PetscFunctionBegin;
228   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
229   ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
230   ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
231   mumps->id.size_schur = 0;
232   mumps->id.schur_lld  = 0;
233   mumps->id.ICNTL(19)  = 0;
234   PetscFunctionReturn(0);
235 }
236 
237 /* solve with rhs in mumps->id.redrhs and return in the same location */
238 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
239 {
240   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
241   Mat                  S,B,X;
242   MatFactorSchurStatus schurstatus;
243   PetscInt             sizesol;
244   PetscErrorCode       ierr;
245 
246   PetscFunctionBegin;
247   ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr);
248   ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr);
249   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr);
250   ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr);
251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
252   ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr);
253 #endif
254   switch (schurstatus) {
255   case MAT_FACTOR_SCHUR_FACTORED:
256     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr);
257     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
259     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
260 #endif
261     if (!mumps->id.ICNTL(9)) { /* transpose solve */
262       ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr);
263     } else {
264       ierr = MatMatSolve(S,B,X);CHKERRQ(ierr);
265     }
266     break;
267   case MAT_FACTOR_SCHUR_INVERTED:
268     sizesol = mumps->id.nrhs*mumps->id.size_schur;
269     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
270       ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
271       ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr);
272       mumps->schur_sizesol = sizesol;
273     }
274     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr);
275     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
277     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
278 #endif
279     ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr);
280     if (!mumps->id.ICNTL(9)) { /* transpose solve */
281       ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr);
282     } else {
283       ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr);
284     }
285     ierr = MatProductSetFromOptions(X);CHKERRQ(ierr);
286     ierr = MatProductSymbolic(X);CHKERRQ(ierr);
287     ierr = MatProductNumeric(X);CHKERRQ(ierr);
288 
289     ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
290     break;
291   default:
292     SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status);
293   }
294   ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr);
295   ierr = MatDestroy(&B);CHKERRQ(ierr);
296   ierr = MatDestroy(&X);CHKERRQ(ierr);
297   PetscFunctionReturn(0);
298 }
299 
300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
301 {
302   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
303   PetscErrorCode ierr;
304 
305   PetscFunctionBegin;
306   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
307     PetscFunctionReturn(0);
308   }
309   if (!expansion) { /* prepare for the condensation step */
310     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
311     /* allocate MUMPS internal array to store reduced right-hand sides */
312     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
313       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
314       mumps->id.lredrhs = mumps->id.size_schur;
315       ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr);
316       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
317     }
318     mumps->id.ICNTL(26) = 1; /* condensation phase */
319   } else { /* prepare for the expansion step */
320     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
321     ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr);
322     mumps->id.ICNTL(26) = 2; /* expansion phase */
323     PetscMUMPS_c(mumps);
324     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
325     /* restore defaults */
326     mumps->id.ICNTL(26) = -1;
327     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
328     if (mumps->id.nrhs > 1) {
329       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
330       mumps->id.lredrhs = 0;
331       mumps->sizeredrhs = 0;
332     }
333   }
334   PetscFunctionReturn(0);
335 }
336 
337 /*
338   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
339 
340   input:
341     A       - matrix in aij,baij or sbaij format
342     shift   - 0: C style output triple; 1: Fortran style output triple.
343     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
344               MAT_REUSE_MATRIX:   only the values in v array are updated
345   output:
346     nnz     - dim of r, c, and v (number of local nonzero entries of A)
347     r, c, v - row and col index, matrix values (matrix triples)
348 
349   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
350   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
351   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
352 
353  */
354 
355 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
356 {
357   const PetscScalar *av;
358   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
359   PetscInt64        nz,rnz,i,j,k;
360   PetscErrorCode    ierr;
361   PetscMUMPSInt     *row,*col;
362   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
363 
364   PetscFunctionBegin;
365   ierr       = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
366   mumps->val = (PetscScalar*)av;
367   if (reuse == MAT_INITIAL_MATRIX) {
368     nz   = aa->nz;
369     ai   = aa->i;
370     aj   = aa->j;
371     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
372     for (i=k=0; i<M; i++) {
373       rnz = ai[i+1] - ai[i];
374       ajj = aj + ai[i];
375       for (j=0; j<rnz; j++) {
376         ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr);
377         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr);
378         k++;
379       }
380     }
381     mumps->irn = row;
382     mumps->jcn = col;
383     mumps->nnz = nz;
384   }
385   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
390 {
391   PetscErrorCode ierr;
392   PetscInt64     nz,i,j,k,r;
393   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
394   PetscMUMPSInt  *row,*col;
395 
396   PetscFunctionBegin;
397   mumps->val = a->val;
398   if (reuse == MAT_INITIAL_MATRIX) {
399     nz   = a->sliidx[a->totalslices];
400     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
401     for (i=k=0; i<a->totalslices; i++) {
402       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
403         ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr);
404       }
405     }
406     for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);}
407     mumps->irn = row;
408     mumps->jcn = col;
409     mumps->nnz = nz;
410   }
411   PetscFunctionReturn(0);
412 }
413 
414 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
415 {
416   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
417   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
418   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
419   PetscInt       bs;
420   PetscErrorCode ierr;
421   PetscMUMPSInt  *row,*col;
422 
423   PetscFunctionBegin;
424   ierr       = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
425   M          = A->rmap->N/bs;
426   mumps->val = aa->a;
427   if (reuse == MAT_INITIAL_MATRIX) {
428     ai   = aa->i; aj = aa->j;
429     nz   = bs2*aa->nz;
430     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
431     for (i=0; i<M; i++) {
432       ajj = aj + ai[i];
433       rnz = ai[i+1] - ai[i];
434       for (k=0; k<rnz; k++) {
435         for (j=0; j<bs; j++) {
436           for (m=0; m<bs; m++) {
437             ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr);
438             ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr);
439             idx++;
440           }
441         }
442       }
443     }
444     mumps->irn = row;
445     mumps->jcn = col;
446     mumps->nnz = nz;
447   }
448   PetscFunctionReturn(0);
449 }
450 
451 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
452 {
453   const PetscInt *ai, *aj,*ajj;
454   PetscInt        bs;
455   PetscInt64      nz,rnz,i,j,k,m;
456   PetscErrorCode  ierr;
457   PetscMUMPSInt   *row,*col;
458   PetscScalar     *val;
459   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
460   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
461 #if defined(PETSC_USE_COMPLEX)
462   PetscBool       hermitian;
463 #endif
464 
465   PetscFunctionBegin;
466 #if defined(PETSC_USE_COMPLEX)
467   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
468   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
469 #endif
470   ai   = aa->i;
471   aj   = aa->j;
472   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
473   if (reuse == MAT_INITIAL_MATRIX) {
474     nz   = aa->nz;
475     ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr);
476     if (bs>1) {
477       ierr       = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr);
478       mumps->val = mumps->val_alloc;
479     } else {
480       mumps->val = aa->a;
481     }
482     mumps->irn = row;
483     mumps->jcn = col;
484   } else {
485     if (bs == 1) mumps->val = aa->a;
486     row = mumps->irn;
487     col = mumps->jcn;
488   }
489   val = mumps->val;
490 
491   nz = 0;
492   if (bs>1) {
493     for (i=0; i<mbs; i++) {
494       rnz = ai[i+1] - ai[i];
495       ajj = aj + ai[i];
496       for (j=0; j<rnz; j++) {
497         for (k=0; k<bs; k++) {
498           for (m=0; m<bs; m++) {
499             if (ajj[j]>i || k>=m) {
500               if (reuse == MAT_INITIAL_MATRIX) {
501                 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr);
502                 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr);
503               }
504               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
505             }
506           }
507         }
508       }
509     }
510   } else if (reuse == MAT_INITIAL_MATRIX) {
511     for (i=0; i<mbs; i++) {
512       rnz = ai[i+1] - ai[i];
513       ajj = aj + ai[i];
514       for (j=0; j<rnz; j++) {
515         ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
516         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
517         nz++;
518       }
519     }
520     if (PetscUnlikely(nz != aa->nz)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz);
521   }
522   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
527 {
528   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
529   PetscInt64        nz,rnz,i,j;
530   const PetscScalar *av,*v1;
531   PetscScalar       *val;
532   PetscErrorCode    ierr;
533   PetscMUMPSInt     *row,*col;
534   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
535   PetscBool         missing;
536 #if defined(PETSC_USE_COMPLEX)
537   PetscBool         hermitian;
538 #endif
539 
540   PetscFunctionBegin;
541 #if defined(PETSC_USE_COMPLEX)
542   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
543   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
544 #endif
545   ierr  = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
546   ai    = aa->i; aj = aa->j;
547   adiag = aa->diag;
548   ierr  = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr);
549   if (reuse == MAT_INITIAL_MATRIX) {
550     /* count nz in the upper triangular part of A */
551     nz = 0;
552     if (missing) {
553       for (i=0; i<M; i++) {
554         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
555           for (j=ai[i];j<ai[i+1];j++) {
556             if (aj[j] < i) continue;
557             nz++;
558           }
559         } else {
560           nz += ai[i+1] - adiag[i];
561         }
562       }
563     } else {
564       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
565     }
566     ierr       = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
567     ierr       = PetscMalloc1(nz,&val);CHKERRQ(ierr);
568     mumps->nnz = nz;
569     mumps->irn = row;
570     mumps->jcn = col;
571     mumps->val = mumps->val_alloc = val;
572 
573     nz = 0;
574     if (missing) {
575       for (i=0; i<M; i++) {
576         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
577           for (j=ai[i];j<ai[i+1];j++) {
578             if (aj[j] < i) continue;
579             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
580             ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr);
581             val[nz] = av[j];
582             nz++;
583           }
584         } else {
585           rnz = ai[i+1] - adiag[i];
586           ajj = aj + adiag[i];
587           v1  = av + adiag[i];
588           for (j=0; j<rnz; j++) {
589             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
590             ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
591             val[nz++] = v1[j];
592           }
593         }
594       }
595     } else {
596       for (i=0; i<M; i++) {
597         rnz = ai[i+1] - adiag[i];
598         ajj = aj + adiag[i];
599         v1  = av + adiag[i];
600         for (j=0; j<rnz; j++) {
601           ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
602           ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
603           val[nz++] = v1[j];
604         }
605       }
606     }
607   } else {
608     nz = 0;
609     val = mumps->val;
610     if (missing) {
611       for (i=0; i <M; i++) {
612         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
613           for (j=ai[i];j<ai[i+1];j++) {
614             if (aj[j] < i) continue;
615             val[nz++] = av[j];
616           }
617         } else {
618           rnz = ai[i+1] - adiag[i];
619           v1  = av + adiag[i];
620           for (j=0; j<rnz; j++) {
621             val[nz++] = v1[j];
622           }
623         }
624       }
625     } else {
626       for (i=0; i <M; i++) {
627         rnz = ai[i+1] - adiag[i];
628         v1  = av + adiag[i];
629         for (j=0; j<rnz; j++) {
630           val[nz++] = v1[j];
631         }
632       }
633     }
634   }
635   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
636   PetscFunctionReturn(0);
637 }
638 
639 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
640 {
641   PetscErrorCode    ierr;
642   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
643   PetscInt          bs;
644   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
645   PetscMUMPSInt     *row,*col;
646   const PetscScalar *av,*bv,*v1,*v2;
647   PetscScalar       *val;
648   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
649   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
650   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
651   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
652 #if defined(PETSC_USE_COMPLEX)
653   PetscBool         hermitian;
654 #endif
655 
656   PetscFunctionBegin;
657 #if defined(PETSC_USE_COMPLEX)
658   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
659   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
660 #endif
661   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
662   rstart = A->rmap->rstart;
663   ai = aa->i;
664   aj = aa->j;
665   bi = bb->i;
666   bj = bb->j;
667   av = aa->a;
668   bv = bb->a;
669 
670   garray = mat->garray;
671 
672   if (reuse == MAT_INITIAL_MATRIX) {
673     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
674     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
675     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
676     /* can not decide the exact mumps->nnz now because of the SBAIJ */
677     mumps->irn = row;
678     mumps->jcn = col;
679     mumps->val = mumps->val_alloc = val;
680   } else {
681     val = mumps->val;
682   }
683 
684   jj = 0; irow = rstart;
685   for (i=0; i<mbs; i++) {
686     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
687     countA = ai[i+1] - ai[i];
688     countB = bi[i+1] - bi[i];
689     bjj    = bj + bi[i];
690     v1     = av + ai[i]*bs2;
691     v2     = bv + bi[i]*bs2;
692 
693     if (bs>1) {
694       /* A-part */
695       for (j=0; j<countA; j++) {
696         for (k=0; k<bs; k++) {
697           for (m=0; m<bs; m++) {
698             if (rstart + ajj[j]*bs>irow || k>=m) {
699               if (reuse == MAT_INITIAL_MATRIX) {
700                 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
701                 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
702               }
703               val[jj++] = v1[j*bs2 + m + k*bs];
704             }
705           }
706         }
707       }
708 
709       /* B-part */
710       for (j=0; j < countB; j++) {
711         for (k=0; k<bs; k++) {
712           for (m=0; m<bs; m++) {
713             if (reuse == MAT_INITIAL_MATRIX) {
714               ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
715               ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
716             }
717             val[jj++] = v2[j*bs2 + m + k*bs];
718           }
719         }
720       }
721     } else {
722       /* A-part */
723       for (j=0; j<countA; j++) {
724         if (reuse == MAT_INITIAL_MATRIX) {
725           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
726           ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
727         }
728         val[jj++] = v1[j];
729       }
730 
731       /* B-part */
732       for (j=0; j < countB; j++) {
733         if (reuse == MAT_INITIAL_MATRIX) {
734           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
735           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
736         }
737         val[jj++] = v2[j];
738       }
739     }
740     irow+=bs;
741   }
742   mumps->nnz = jj;
743   PetscFunctionReturn(0);
744 }
745 
746 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
747 {
748   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
749   PetscErrorCode    ierr;
750   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
751   PetscMUMPSInt     *row,*col;
752   const PetscScalar *av, *bv,*v1,*v2;
753   PetscScalar       *val;
754   Mat               Ad,Ao;
755   Mat_SeqAIJ        *aa;
756   Mat_SeqAIJ        *bb;
757 
758   PetscFunctionBegin;
759   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
760   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
761   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
762 
763   aa = (Mat_SeqAIJ*)(Ad)->data;
764   bb = (Mat_SeqAIJ*)(Ao)->data;
765   ai = aa->i;
766   aj = aa->j;
767   bi = bb->i;
768   bj = bb->j;
769 
770   rstart = A->rmap->rstart;
771 
772   if (reuse == MAT_INITIAL_MATRIX) {
773     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
774     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
775     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
776     mumps->nnz = nz;
777     mumps->irn = row;
778     mumps->jcn = col;
779     mumps->val = mumps->val_alloc = val;
780   } else {
781     val = mumps->val;
782   }
783 
784   jj = 0; irow = rstart;
785   for (i=0; i<m; i++) {
786     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
787     countA = ai[i+1] - ai[i];
788     countB = bi[i+1] - bi[i];
789     bjj    = bj + bi[i];
790     v1     = av + ai[i];
791     v2     = bv + bi[i];
792 
793     /* A-part */
794     for (j=0; j<countA; j++) {
795       if (reuse == MAT_INITIAL_MATRIX) {
796         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
797         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
798       }
799       val[jj++] = v1[j];
800     }
801 
802     /* B-part */
803     for (j=0; j < countB; j++) {
804       if (reuse == MAT_INITIAL_MATRIX) {
805         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
806         ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
807       }
808       val[jj++] = v2[j];
809     }
810     irow++;
811   }
812   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
813   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
814   PetscFunctionReturn(0);
815 }
816 
817 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
818 {
819   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
820   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
821   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
822   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
823   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
824   const PetscInt    bs2=mat->bs2;
825   PetscErrorCode    ierr;
826   PetscInt          bs;
827   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
828   PetscMUMPSInt     *row,*col;
829   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
830   PetscScalar       *val;
831 
832   PetscFunctionBegin;
833   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
834   if (reuse == MAT_INITIAL_MATRIX) {
835     nz   = bs2*(aa->nz + bb->nz);
836     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
837     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
838     mumps->nnz = nz;
839     mumps->irn = row;
840     mumps->jcn = col;
841     mumps->val = mumps->val_alloc = val;
842   } else {
843     val = mumps->val;
844   }
845 
846   jj = 0; irow = rstart;
847   for (i=0; i<mbs; i++) {
848     countA = ai[i+1] - ai[i];
849     countB = bi[i+1] - bi[i];
850     ajj    = aj + ai[i];
851     bjj    = bj + bi[i];
852     v1     = av + bs2*ai[i];
853     v2     = bv + bs2*bi[i];
854 
855     idx = 0;
856     /* A-part */
857     for (k=0; k<countA; k++) {
858       for (j=0; j<bs; j++) {
859         for (n=0; n<bs; n++) {
860           if (reuse == MAT_INITIAL_MATRIX) {
861             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
862             ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr);
863           }
864           val[jj++] = v1[idx++];
865         }
866       }
867     }
868 
869     idx = 0;
870     /* B-part */
871     for (k=0; k<countB; k++) {
872       for (j=0; j<bs; j++) {
873         for (n=0; n<bs; n++) {
874           if (reuse == MAT_INITIAL_MATRIX) {
875             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
876             ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr);
877           }
878           val[jj++] = v2[idx++];
879         }
880       }
881     }
882     irow += bs;
883   }
884   PetscFunctionReturn(0);
885 }
886 
887 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
888 {
889   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
890   PetscErrorCode    ierr;
891   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
892   PetscMUMPSInt     *row,*col;
893   const PetscScalar *av, *bv,*v1,*v2;
894   PetscScalar       *val;
895   Mat               Ad,Ao;
896   Mat_SeqAIJ        *aa;
897   Mat_SeqAIJ        *bb;
898 #if defined(PETSC_USE_COMPLEX)
899   PetscBool         hermitian;
900 #endif
901 
902   PetscFunctionBegin;
903 #if defined(PETSC_USE_COMPLEX)
904   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
905   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
906 #endif
907   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
908   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
909   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
910 
911   aa    = (Mat_SeqAIJ*)(Ad)->data;
912   bb    = (Mat_SeqAIJ*)(Ao)->data;
913   ai    = aa->i;
914   aj    = aa->j;
915   adiag = aa->diag;
916   bi    = bb->i;
917   bj    = bb->j;
918 
919   rstart = A->rmap->rstart;
920 
921   if (reuse == MAT_INITIAL_MATRIX) {
922     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
923     nzb = 0;    /* num of upper triangular entries in mat->B */
924     for (i=0; i<m; i++) {
925       nza   += (ai[i+1] - adiag[i]);
926       countB = bi[i+1] - bi[i];
927       bjj    = bj + bi[i];
928       for (j=0; j<countB; j++) {
929         if (garray[bjj[j]] > rstart) nzb++;
930       }
931     }
932 
933     nz   = nza + nzb; /* total nz of upper triangular part of mat */
934     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
935     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
936     mumps->nnz = nz;
937     mumps->irn = row;
938     mumps->jcn = col;
939     mumps->val = mumps->val_alloc = val;
940   } else {
941     val = mumps->val;
942   }
943 
944   jj = 0; irow = rstart;
945   for (i=0; i<m; i++) {
946     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
947     v1     = av + adiag[i];
948     countA = ai[i+1] - adiag[i];
949     countB = bi[i+1] - bi[i];
950     bjj    = bj + bi[i];
951     v2     = bv + bi[i];
952 
953     /* A-part */
954     for (j=0; j<countA; j++) {
955       if (reuse == MAT_INITIAL_MATRIX) {
956         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
957         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
958       }
959       val[jj++] = v1[j];
960     }
961 
962     /* B-part */
963     for (j=0; j < countB; j++) {
964       if (garray[bjj[j]] > rstart) {
965         if (reuse == MAT_INITIAL_MATRIX) {
966           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
967           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
968         }
969         val[jj++] = v2[j];
970       }
971     }
972     irow++;
973   }
974   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
975   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
976   PetscFunctionReturn(0);
977 }
978 
979 PetscErrorCode MatDestroy_MUMPS(Mat A)
980 {
981   PetscErrorCode ierr;
982   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
983 
984   PetscFunctionBegin;
985   ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
986   ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr);
987   ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
988   ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr);
989   ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
990   ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr);
991   ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
992   ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
993   ierr = PetscFree(mumps->info);CHKERRQ(ierr);
994   ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
995   mumps->id.job = JOB_END;
996   PetscMUMPS_c(mumps);
997   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
998 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
999   if (mumps->use_petsc_omp_support) {
1000     ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr);
1001     ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1002     ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr);
1003   }
1004 #endif
1005   ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
1006   ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
1007   ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr);
1008   ierr = PetscFree(mumps->reqs);CHKERRQ(ierr);
1009   ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr);
1010   if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);CHKERRQ(ierr);}
1011   ierr = PetscFree(A->data);CHKERRQ(ierr);
1012 
1013   /* clear composed functions */
1014   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1015   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr);
1016   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr);
1017   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr);
1018   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr);
1019   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr);
1020   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr);
1021   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr);
1022   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr);
1023   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr);
1024   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr);
1025   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr);
1026   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1031 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1032 {
1033   PetscErrorCode     ierr;
1034   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1035   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1036   PetscInt           i,m,M,rstart;
1037 
1038   PetscFunctionBegin;
1039   ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr);
1040   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
1041   if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1042   if (ompsize == 1) {
1043     if (!mumps->irhs_loc) {
1044       mumps->nloc_rhs = m;
1045       ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr);
1046       ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1047       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1048     }
1049     mumps->id.rhs_loc = (MumpsScalar*)array;
1050   } else {
1051   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1052     const PetscInt  *ranges;
1053     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1054     MPI_Group       petsc_group,omp_group;
1055     PetscScalar     *recvbuf=NULL;
1056 
1057     if (mumps->is_omp_master) {
1058       /* Lazily initialize the omp stuff for distributed rhs */
1059       if (!mumps->irhs_loc) {
1060         ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr);
1061         ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr);
1062         ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRMPI(ierr);
1063         ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRMPI(ierr);
1064         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1065         ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRMPI(ierr);
1066 
1067         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1068         mumps->nloc_rhs = 0;
1069         ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr);
1070         for (j=0; j<ompsize; j++) {
1071           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1072           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1073         }
1074         ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr);
1075         for (j=k=0; j<ompsize; j++) {
1076           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1077         }
1078 
1079         ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr);
1080         ierr = MPI_Group_free(&petsc_group);CHKERRMPI(ierr);
1081         ierr = MPI_Group_free(&omp_group);CHKERRMPI(ierr);
1082       }
1083 
1084       /* Realloc buffers when current nrhs is bigger than what we have met */
1085       if (nrhs > mumps->max_nrhs) {
1086         ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1087         ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr);
1088         mumps->max_nrhs = nrhs;
1089       }
1090 
1091       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1092       for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);}
1093       mumps->rhs_disps[0] = 0;
1094       for (j=1; j<ompsize; j++) {
1095         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1096         if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1097       }
1098       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1099     }
1100 
1101     ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr);
1102     ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRMPI(ierr);
1103 
1104     if (mumps->is_omp_master) {
1105       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1106         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1107         for (j=0; j<ompsize; j++) {
1108           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1109           dst = dstbase;
1110           for (i=0; i<nrhs; i++) {
1111             ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr);
1112             src += mumps->rhs_nrow[j];
1113             dst += mumps->nloc_rhs;
1114           }
1115           dstbase += mumps->rhs_nrow[j];
1116         }
1117       }
1118       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1119     }
1120   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1121   }
1122   mumps->id.nrhs     = nrhs;
1123   mumps->id.nloc_rhs = mumps->nloc_rhs;
1124   mumps->id.lrhs_loc = mumps->nloc_rhs;
1125   mumps->id.irhs_loc = mumps->irhs_loc;
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1130 {
1131   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1132   const PetscScalar  *rarray = NULL;
1133   PetscScalar        *array;
1134   IS                 is_iden,is_petsc;
1135   PetscErrorCode     ierr;
1136   PetscInt           i;
1137   PetscBool          second_solve = PETSC_FALSE;
1138   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1139 
1140   PetscFunctionBegin;
1141   ierr = PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1);CHKERRQ(ierr);
1142   ierr = PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2);CHKERRQ(ierr);
1143 
1144   if (A->factorerrortype) {
1145     ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1146     ierr = VecSetInf(x);CHKERRQ(ierr);
1147     PetscFunctionReturn(0);
1148   }
1149 
1150   mumps->id.nrhs = 1;
1151   if (mumps->petsc_size > 1) {
1152     if (mumps->ICNTL20 == 10) {
1153       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1154       ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr);
1155       ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr);
1156     } else {
1157       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1158       ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1159       ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160       if (!mumps->myid) {
1161         ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr);
1162         mumps->id.rhs = (MumpsScalar*)array;
1163       }
1164     }
1165   } else {  /* petsc_size == 1 */
1166     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1167     ierr = VecCopy(b,x);CHKERRQ(ierr);
1168     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
1169     mumps->id.rhs = (MumpsScalar*)array;
1170   }
1171 
1172   /*
1173      handle condensation step of Schur complement (if any)
1174      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1175      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1176      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1177      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1178   */
1179   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1180     if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1181     second_solve = PETSC_TRUE;
1182     ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1183   }
1184   /* solve phase */
1185   /*-------------*/
1186   mumps->id.job = JOB_SOLVE;
1187   PetscMUMPS_c(mumps);
1188   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1189 
1190   /* handle expansion step of Schur complement (if any) */
1191   if (second_solve) {
1192     ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1193   }
1194 
1195   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1196     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1197       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1198       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1199     }
1200     if (!mumps->scat_sol) { /* create scatter scat_sol */
1201       PetscInt *isol2_loc=NULL;
1202       ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
1203       ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr);
1204       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1205       ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr);  /* to */
1206       ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr);
1207       ierr = ISDestroy(&is_iden);CHKERRQ(ierr);
1208       ierr = ISDestroy(&is_petsc);CHKERRQ(ierr);
1209       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1210     }
1211 
1212     ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213     ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214   }
1215 
1216   if (mumps->petsc_size > 1) {
1217     if (mumps->ICNTL20 == 10) {
1218       ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr);
1219     } else if (!mumps->myid) {
1220       ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr);
1221     }
1222   } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);}
1223 
1224   ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1229 {
1230   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   mumps->id.ICNTL(9) = 0;
1235   ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr);
1236   mumps->id.ICNTL(9) = 1;
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1241 {
1242   PetscErrorCode    ierr;
1243   Mat               Bt = NULL;
1244   PetscBool         denseX,denseB,flg,flgT;
1245   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1246   PetscInt          i,nrhs,M;
1247   PetscScalar       *array;
1248   const PetscScalar *rbray;
1249   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1250   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1251   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1252   IS                is_to,is_from;
1253   PetscInt          k,proc,j,m,myrstart;
1254   const PetscInt    *rstart;
1255   Vec               v_mpi,msol_loc;
1256   VecScatter        scat_sol;
1257   Vec               b_seq;
1258   VecScatter        scat_rhs;
1259   PetscScalar       *aa;
1260   PetscInt          spnr,*ia,*ja;
1261   Mat_MPIAIJ        *b = NULL;
1262 
1263   PetscFunctionBegin;
1264   ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1265   if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1266 
1267   ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1268   if (denseB) {
1269     if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1270     mumps->id.ICNTL(20)= 0; /* dense RHS */
1271   } else { /* sparse B */
1272     if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1273     ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr);
1274     if (flgT) { /* input B is transpose of actural RHS matrix,
1275                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1276       ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr);
1277     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1278     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1279   }
1280 
1281   ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr);
1282   mumps->id.nrhs = nrhs;
1283   mumps->id.lrhs = M;
1284   mumps->id.rhs  = NULL;
1285 
1286   if (mumps->petsc_size == 1) {
1287     PetscScalar *aa;
1288     PetscInt    spnr,*ia,*ja;
1289     PetscBool   second_solve = PETSC_FALSE;
1290 
1291     ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1292     mumps->id.rhs = (MumpsScalar*)array;
1293 
1294     if (denseB) {
1295       /* copy B to X */
1296       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1297       ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr);
1298       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1299     } else { /* sparse B */
1300       ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr);
1301       ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1302       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1303       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1304       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1305     }
1306     /* handle condensation step of Schur complement (if any) */
1307     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1308       second_solve = PETSC_TRUE;
1309       ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1310     }
1311     /* solve phase */
1312     /*-------------*/
1313     mumps->id.job = JOB_SOLVE;
1314     PetscMUMPS_c(mumps);
1315     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1316 
1317     /* handle expansion step of Schur complement (if any) */
1318     if (second_solve) {
1319       ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1320     }
1321     if (!denseB) { /* sparse B */
1322       ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr);
1323       ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1324       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1325     }
1326     ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1327     PetscFunctionReturn(0);
1328   }
1329 
1330   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1331   if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1332 
1333   /* create msol_loc to hold mumps local solution */
1334   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1335   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1336 
1337   lsol_loc  = mumps->id.lsol_loc;
1338   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1339   ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr);
1340   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1341   mumps->id.isol_loc = isol_loc;
1342 
1343   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr);
1344 
1345   if (denseB) {
1346     if (mumps->ICNTL20 == 10) {
1347       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1348       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1349       ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr);
1350       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1351       ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1352       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr);
1353     } else {
1354       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1355       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1356         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1357         0, re-arrange B into desired order, which is a local operation.
1358       */
1359 
1360       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1361       /* wrap dense rhs matrix B into a vector v_mpi */
1362       ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1363       ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr);
1364       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1365       ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr);
1366 
1367       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1368       if (!mumps->myid) {
1369         PetscInt *idx;
1370         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1371         ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr);
1372         ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr);
1373         k = 0;
1374         for (proc=0; proc<mumps->petsc_size; proc++) {
1375           for (j=0; j<nrhs; j++) {
1376             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1377           }
1378         }
1379 
1380         ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr);
1381         ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr);
1382         ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr);
1383       } else {
1384         ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr);
1385         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr);
1386         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr);
1387       }
1388       ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr);
1389       ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1390       ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1391       ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1392       ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1393 
1394       if (!mumps->myid) { /* define rhs on the host */
1395         ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr);
1396         mumps->id.rhs = (MumpsScalar*)bray;
1397         ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr);
1398       }
1399     }
1400   } else { /* sparse B */
1401     b = (Mat_MPIAIJ*)Bt->data;
1402 
1403     /* wrap dense X into a vector v_mpi */
1404     ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr);
1405     ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr);
1406     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1407     ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr);
1408 
1409     if (!mumps->myid) {
1410       ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr);
1411       ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1412       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1413       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1414       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1415     } else {
1416       mumps->id.irhs_ptr    = NULL;
1417       mumps->id.irhs_sparse = NULL;
1418       mumps->id.nz_rhs      = 0;
1419       mumps->id.rhs_sparse  = NULL;
1420     }
1421   }
1422 
1423   /* solve phase */
1424   /*-------------*/
1425   mumps->id.job = JOB_SOLVE;
1426   PetscMUMPS_c(mumps);
1427   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1428 
1429   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1430   ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1431   ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr);
1432 
1433   /* create scatter scat_sol */
1434   ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr);
1435   /* iidx: index for scatter mumps solution to petsc X */
1436 
1437   ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr);
1438   ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr);
1439   for (i=0; i<lsol_loc; i++) {
1440     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1441 
1442     for (proc=0; proc<mumps->petsc_size; proc++) {
1443       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1444         myrstart = rstart[proc];
1445         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1446         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1447         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1448         break;
1449       }
1450     }
1451 
1452     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1453   }
1454   ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr);
1455   ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr);
1456   ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1457   ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1458   ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1459   ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1460   ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1461 
1462   /* free spaces */
1463   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1464   mumps->id.isol_loc = isol_loc_save;
1465 
1466   ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr);
1467   ierr = PetscFree(idxx);CHKERRQ(ierr);
1468   ierr = VecDestroy(&msol_loc);CHKERRQ(ierr);
1469   ierr = VecDestroy(&v_mpi);CHKERRQ(ierr);
1470   if (!denseB) {
1471     if (!mumps->myid) {
1472       b = (Mat_MPIAIJ*)Bt->data;
1473       ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr);
1474       ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1475       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1476     }
1477   } else {
1478     if (mumps->ICNTL20 == 0) {
1479       ierr = VecDestroy(&b_seq);CHKERRQ(ierr);
1480       ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr);
1481     }
1482   }
1483   ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr);
1484   ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1489 {
1490   PetscErrorCode ierr;
1491   PetscBool      flg;
1492   Mat            B;
1493 
1494   PetscFunctionBegin;
1495   ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
1496   if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1497 
1498   /* Create B=Bt^T that uses Bt's data structure */
1499   ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr);
1500 
1501   ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr);
1502   ierr = MatDestroy(&B);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #if !defined(PETSC_USE_COMPLEX)
1507 /*
1508   input:
1509    F:        numeric factor
1510   output:
1511    nneg:     total number of negative pivots
1512    nzero:    total number of zero pivots
1513    npos:     (global dimension of F) - nneg - nzero
1514 */
1515 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1516 {
1517   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1518   PetscErrorCode ierr;
1519   PetscMPIInt    size;
1520 
1521   PetscFunctionBegin;
1522   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRMPI(ierr);
1523   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1524   if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13));
1525 
1526   if (nneg) *nneg = mumps->id.INFOG(12);
1527   if (nzero || npos) {
1528     if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1529     if (nzero) *nzero = mumps->id.INFOG(28);
1530     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1531   }
1532   PetscFunctionReturn(0);
1533 }
1534 #endif
1535 
1536 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1537 {
1538   PetscErrorCode ierr;
1539   PetscInt       i,nreqs;
1540   PetscMUMPSInt  *irn,*jcn;
1541   PetscMPIInt    count;
1542   PetscInt64     totnnz,remain;
1543   const PetscInt osize=mumps->omp_comm_size;
1544   PetscScalar    *val;
1545 
1546   PetscFunctionBegin;
1547   if (osize > 1) {
1548     if (reuse == MAT_INITIAL_MATRIX) {
1549       /* master first gathers counts of nonzeros to receive */
1550       if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);}
1551       ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRMPI(ierr);
1552 
1553       /* Then each computes number of send/recvs */
1554       if (mumps->is_omp_master) {
1555         /* Start from 1 since self communication is not done in MPI */
1556         nreqs = 0;
1557         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1558       } else {
1559         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1560       }
1561       ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */
1562 
1563       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1564          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1565          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1566          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1567        */
1568       nreqs = 0; /* counter for actual send/recvs */
1569       if (mumps->is_omp_master) {
1570         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1571         ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr);
1572         ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr);
1573 
1574         /* Self communication */
1575         ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr);
1576         ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr);
1577         ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr);
1578 
1579         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1580         ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
1581         ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
1582         mumps->nnz = totnnz;
1583         mumps->irn = irn;
1584         mumps->jcn = jcn;
1585         mumps->val = mumps->val_alloc = val;
1586 
1587         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1588         jcn += mumps->recvcount[0];
1589         val += mumps->recvcount[0];
1590 
1591         /* Remote communication */
1592         for (i=1; i<osize; i++) {
1593           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1594           remain = mumps->recvcount[i] - count;
1595           while (count>0) {
1596             ierr    = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1597             ierr    = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1598             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1599             irn    += count;
1600             jcn    += count;
1601             val    += count;
1602             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1603             remain -= count;
1604           }
1605         }
1606       } else {
1607         irn    = mumps->irn;
1608         jcn    = mumps->jcn;
1609         val    = mumps->val;
1610         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1611         remain = mumps->nnz - count;
1612         while (count>0) {
1613           ierr    = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1614           ierr    = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1615           ierr    = MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1616           irn    += count;
1617           jcn    += count;
1618           val    += count;
1619           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1620           remain -= count;
1621         }
1622       }
1623     } else {
1624       nreqs = 0;
1625       if (mumps->is_omp_master) {
1626         val = mumps->val + mumps->recvcount[0];
1627         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1628           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1629           remain = mumps->recvcount[i] - count;
1630           while (count>0) {
1631             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1632             val    += count;
1633             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1634             remain -= count;
1635           }
1636         }
1637       } else {
1638         val    = mumps->val;
1639         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1640         remain = mumps->nnz - count;
1641         while (count>0) {
1642           ierr    = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1643           val    += count;
1644           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1645           remain -= count;
1646         }
1647       }
1648     }
1649     ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
1650     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1651   }
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1656 {
1657   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1658   PetscErrorCode ierr;
1659   PetscBool      isMPIAIJ;
1660 
1661   PetscFunctionBegin;
1662   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1663     if (mumps->id.INFOG(1) == -6) {
1664       ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1665     }
1666     ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1667     PetscFunctionReturn(0);
1668   }
1669 
1670   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr);
1671   ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr);
1672 
1673   /* numerical factorization phase */
1674   /*-------------------------------*/
1675   mumps->id.job = JOB_FACTNUMERIC;
1676   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1677     if (!mumps->myid) {
1678       mumps->id.a = (MumpsScalar*)mumps->val;
1679     }
1680   } else {
1681     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1682   }
1683   PetscMUMPS_c(mumps);
1684   if (mumps->id.INFOG(1) < 0) {
1685     if (A->erroriffailure) {
1686       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
1687     } else {
1688       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1689         ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1690         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1691       } else if (mumps->id.INFOG(1) == -13) {
1692         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1693         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1694       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1695         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1696         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1697       } else {
1698         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1699         F->factorerrortype = MAT_FACTOR_OTHER;
1700       }
1701     }
1702   }
1703   if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16));
1704 
1705   F->assembled    = PETSC_TRUE;
1706 
1707   if (F->schur) { /* reset Schur status to unfactored */
1708 #if defined(PETSC_HAVE_CUDA)
1709     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1710 #endif
1711     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1712       mumps->id.ICNTL(19) = 2;
1713       ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr);
1714     }
1715     ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr);
1716   }
1717 
1718   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1719   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1720 
1721   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1722   if (mumps->petsc_size > 1) {
1723     PetscInt    lsol_loc;
1724     PetscScalar *sol_loc;
1725 
1726     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr);
1727 
1728     /* distributed solution; Create x_seq=sol_loc for repeated use */
1729     if (mumps->x_seq) {
1730       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1731       ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
1732       ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
1733     }
1734     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1735     ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr);
1736     mumps->id.lsol_loc = lsol_loc;
1737     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1738     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr);
1739   }
1740   ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr);
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 /* Sets MUMPS options from the options database */
1745 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1746 {
1747   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1748   PetscErrorCode ierr;
1749   PetscMUMPSInt  icntl=0;
1750   PetscInt       info[80],i,ninfo=80;
1751   PetscBool      flg=PETSC_FALSE;
1752 
1753   PetscFunctionBegin;
1754   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
1755   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr);
1756   if (flg) mumps->id.ICNTL(1) = icntl;
1757   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr);
1758   if (flg) mumps->id.ICNTL(2) = icntl;
1759   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr);
1760   if (flg) mumps->id.ICNTL(3) = icntl;
1761 
1762   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
1763   if (flg) mumps->id.ICNTL(4) = icntl;
1764   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1765 
1766   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr);
1767   if (flg) mumps->id.ICNTL(6) = icntl;
1768 
1769   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
1770   if (flg) {
1771     if (icntl == 1 || icntl < 0 || icntl > 7) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1772     mumps->id.ICNTL(7) = icntl;
1773   }
1774 
1775   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr);
1776   /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */
1777   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr);
1778   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr);
1779   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr);
1780   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr);
1781   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr);
1782   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr);
1783   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1784     ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
1785     ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
1786   }
1787 
1788   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1789      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1790      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1791      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1792      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1793      In short, we could not use distributed RHS with MPICH until v4.0b1.
1794    */
1795 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1796   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1797 #else
1798   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1799 #endif
1800   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr);
1801   if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20);
1802 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1803   if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1804 #endif
1805   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */
1806 
1807   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr);
1808   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr);
1809   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr);
1810   if (mumps->id.ICNTL(24)) {
1811     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1812   }
1813 
1814   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr);
1815   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr);
1816   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr);
1817   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr);
1818   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr);
1819   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */
1820   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr);
1821   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr);  -- not supported by PETSc API */
1822   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr);
1823   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr);
1824   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr);
1825   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr);
1826 
1827   ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr);
1828   ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr);
1829   ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr);
1830   ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr);
1831   ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr);
1832   ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr);
1833 
1834   ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr);
1835 
1836   ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr);
1837   if (ninfo) {
1838     if (PetscUnlikely(ninfo > 80)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo);
1839     ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr);
1840     mumps->ninfo = ninfo;
1841     for (i=0; i<ninfo; i++) {
1842       if (PetscUnlikely(info[i] < 0 || info[i]>80)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo);
1843       else  mumps->info[i] = info[i];
1844     }
1845   }
1846 
1847   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1852 {
1853   PetscErrorCode ierr;
1854   PetscInt       nthreads=0;
1855 
1856   PetscFunctionBegin;
1857   mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1858   ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRMPI(ierr);
1859   ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRMPI(ierr);/* "if (!myid)" still works even if mumps_comm is different */
1860 
1861   ierr = PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr);
1862   if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1863   ierr = PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr);
1864   if (mumps->use_petsc_omp_support) {
1865 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1866     ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr);
1867     ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr);
1868 #else
1869     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:"");
1870 #endif
1871   } else {
1872     mumps->omp_comm      = PETSC_COMM_SELF;
1873     mumps->mumps_comm    = mumps->petsc_comm;
1874     mumps->is_omp_master = PETSC_TRUE;
1875   }
1876   ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRMPI(ierr);
1877   mumps->reqs = NULL;
1878   mumps->tag  = 0;
1879 
1880   /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1881   if (mumps->mumps_comm != MPI_COMM_NULL) {
1882     ierr =  PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);CHKERRQ(ierr);
1883   }
1884 
1885   mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1886   mumps->id.job = JOB_INIT;
1887   mumps->id.par = 1;  /* host participates factorizaton and solve */
1888   mumps->id.sym = mumps->sym;
1889 
1890   PetscMUMPS_c(mumps);
1891   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
1892 
1893   /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1894      For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1895    */
1896   ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm);CHKERRMPI(ierr);
1897   ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRMPI(ierr);
1898 
1899   mumps->scat_rhs = NULL;
1900   mumps->scat_sol = NULL;
1901 
1902   /* set PETSc-MUMPS default options - override MUMPS default */
1903   mumps->id.ICNTL(3) = 0;
1904   mumps->id.ICNTL(4) = 0;
1905   if (mumps->petsc_size == 1) {
1906     mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1907     mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1908   } else {
1909     mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1910     mumps->id.ICNTL(21) = 1;   /* distributed solution */
1911   }
1912 
1913   /* schur */
1914   mumps->id.size_schur    = 0;
1915   mumps->id.listvar_schur = NULL;
1916   mumps->id.schur         = NULL;
1917   mumps->sizeredrhs       = 0;
1918   mumps->schur_sol        = NULL;
1919   mumps->schur_sizesol    = 0;
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1924 {
1925   PetscErrorCode ierr;
1926 
1927   PetscFunctionBegin;
1928   if (mumps->id.INFOG(1) < 0) {
1929     if (A->erroriffailure) {
1930       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1));
1931     } else {
1932       if (mumps->id.INFOG(1) == -6) {
1933         ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1934         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1935       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1936         ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1937         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1938       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1939         ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr);
1940       } else {
1941         ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1942         F->factorerrortype = MAT_FACTOR_OTHER;
1943       }
1944     }
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1950 {
1951   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1952   PetscErrorCode ierr;
1953   Vec            b;
1954   const PetscInt M = A->rmap->N;
1955 
1956   PetscFunctionBegin;
1957   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
1958     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
1959     PetscFunctionReturn(0);
1960   }
1961 
1962   /* Set MUMPS options from the options database */
1963   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
1964 
1965   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
1966   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
1967 
1968   /* analysis phase */
1969   /*----------------*/
1970   mumps->id.job = JOB_FACTSYMBOLIC;
1971   mumps->id.n   = M;
1972   switch (mumps->id.ICNTL(18)) {
1973   case 0:  /* centralized assembled matrix input */
1974     if (!mumps->myid) {
1975       mumps->id.nnz = mumps->nnz;
1976       mumps->id.irn = mumps->irn;
1977       mumps->id.jcn = mumps->jcn;
1978       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1979       if (r) {
1980         mumps->id.ICNTL(7) = 1;
1981         if (!mumps->myid) {
1982           const PetscInt *idx;
1983           PetscInt       i;
1984 
1985           ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr);
1986           ierr = ISGetIndices(r,&idx);CHKERRQ(ierr);
1987           for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */
1988           ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr);
1989         }
1990       }
1991     }
1992     break;
1993   case 3:  /* distributed assembled matrix input (size>1) */
1994     mumps->id.nnz_loc = mumps->nnz;
1995     mumps->id.irn_loc = mumps->irn;
1996     mumps->id.jcn_loc = mumps->jcn;
1997     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1998     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1999       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2000       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2001       ierr = VecDestroy(&b);CHKERRQ(ierr);
2002     }
2003     break;
2004   }
2005   PetscMUMPS_c(mumps);
2006   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2007 
2008   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2009   F->ops->solve           = MatSolve_MUMPS;
2010   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2011   F->ops->matsolve        = MatMatSolve_MUMPS;
2012   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2013 
2014   mumps->matstruc = SAME_NONZERO_PATTERN;
2015   PetscFunctionReturn(0);
2016 }
2017 
2018 /* Note the Petsc r and c permutations are ignored */
2019 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2020 {
2021   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2022   PetscErrorCode ierr;
2023   Vec            b;
2024   const PetscInt M = A->rmap->N;
2025 
2026   PetscFunctionBegin;
2027   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2028     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2029     PetscFunctionReturn(0);
2030   }
2031 
2032   /* Set MUMPS options from the options database */
2033   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2034 
2035   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2036   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2037 
2038   /* analysis phase */
2039   /*----------------*/
2040   mumps->id.job = JOB_FACTSYMBOLIC;
2041   mumps->id.n   = M;
2042   switch (mumps->id.ICNTL(18)) {
2043   case 0:  /* centralized assembled matrix input */
2044     if (!mumps->myid) {
2045       mumps->id.nnz = mumps->nnz;
2046       mumps->id.irn = mumps->irn;
2047       mumps->id.jcn = mumps->jcn;
2048       if (mumps->id.ICNTL(6)>1) {
2049         mumps->id.a = (MumpsScalar*)mumps->val;
2050       }
2051     }
2052     break;
2053   case 3:  /* distributed assembled matrix input (size>1) */
2054     mumps->id.nnz_loc = mumps->nnz;
2055     mumps->id.irn_loc = mumps->irn;
2056     mumps->id.jcn_loc = mumps->jcn;
2057     if (mumps->id.ICNTL(6)>1) {
2058       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2059     }
2060     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2061       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2062       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2063       ierr = VecDestroy(&b);CHKERRQ(ierr);
2064     }
2065     break;
2066   }
2067   PetscMUMPS_c(mumps);
2068   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2069 
2070   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2071   F->ops->solve           = MatSolve_MUMPS;
2072   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2073 
2074   mumps->matstruc = SAME_NONZERO_PATTERN;
2075   PetscFunctionReturn(0);
2076 }
2077 
2078 /* Note the Petsc r permutation and factor info are ignored */
2079 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2080 {
2081   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2082   PetscErrorCode ierr;
2083   Vec            b;
2084   const PetscInt M = A->rmap->N;
2085 
2086   PetscFunctionBegin;
2087   if (mumps->matstruc == SAME_NONZERO_PATTERN) {
2088     /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */
2089     PetscFunctionReturn(0);
2090   }
2091 
2092   /* Set MUMPS options from the options database */
2093   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2094 
2095   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2096   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2097 
2098   /* analysis phase */
2099   /*----------------*/
2100   mumps->id.job = JOB_FACTSYMBOLIC;
2101   mumps->id.n   = M;
2102   switch (mumps->id.ICNTL(18)) {
2103   case 0:  /* centralized assembled matrix input */
2104     if (!mumps->myid) {
2105       mumps->id.nnz = mumps->nnz;
2106       mumps->id.irn = mumps->irn;
2107       mumps->id.jcn = mumps->jcn;
2108       if (mumps->id.ICNTL(6)>1) {
2109         mumps->id.a = (MumpsScalar*)mumps->val;
2110       }
2111     }
2112     break;
2113   case 3:  /* distributed assembled matrix input (size>1) */
2114     mumps->id.nnz_loc = mumps->nnz;
2115     mumps->id.irn_loc = mumps->irn;
2116     mumps->id.jcn_loc = mumps->jcn;
2117     if (mumps->id.ICNTL(6)>1) {
2118       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2119     }
2120     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2121       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2122       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2123       ierr = VecDestroy(&b);CHKERRQ(ierr);
2124     }
2125     break;
2126   }
2127   PetscMUMPS_c(mumps);
2128   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2129 
2130   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2131   F->ops->solve                 = MatSolve_MUMPS;
2132   F->ops->solvetranspose        = MatSolve_MUMPS;
2133   F->ops->matsolve              = MatMatSolve_MUMPS;
2134   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2135 #if defined(PETSC_USE_COMPLEX)
2136   F->ops->getinertia = NULL;
2137 #else
2138   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2139 #endif
2140 
2141   mumps->matstruc = SAME_NONZERO_PATTERN;
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2146 {
2147   PetscErrorCode    ierr;
2148   PetscBool         iascii;
2149   PetscViewerFormat format;
2150   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2151 
2152   PetscFunctionBegin;
2153   /* check if matrix is mumps type */
2154   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2155 
2156   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
2157   if (iascii) {
2158     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
2159     if (format == PETSC_VIEWER_ASCII_INFO) {
2160       ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
2161       ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d\n",mumps->id.sym);CHKERRQ(ierr);
2162       ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d\n",mumps->id.par);CHKERRQ(ierr);
2163       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d\n",mumps->id.ICNTL(1));CHKERRQ(ierr);
2164       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2));CHKERRQ(ierr);
2165       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d\n",mumps->id.ICNTL(3));CHKERRQ(ierr);
2166       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d\n",mumps->id.ICNTL(4));CHKERRQ(ierr);
2167       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d\n",mumps->id.ICNTL(5));CHKERRQ(ierr);
2168       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d\n",mumps->id.ICNTL(6));CHKERRQ(ierr);
2169       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7));CHKERRQ(ierr);
2170       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d\n",mumps->id.ICNTL(8));CHKERRQ(ierr);
2171       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d\n",mumps->id.ICNTL(10));CHKERRQ(ierr);
2172       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d\n",mumps->id.ICNTL(11));CHKERRQ(ierr);
2173       if (mumps->id.ICNTL(11)>0) {
2174         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr);
2175         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr);
2176         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr);
2177         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr);
2178         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr);
2179         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr);
2180       }
2181       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d\n",mumps->id.ICNTL(12));CHKERRQ(ierr);
2182       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d\n",mumps->id.ICNTL(13));CHKERRQ(ierr);
2183       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14));CHKERRQ(ierr);
2184       /* ICNTL(15-17) not used */
2185       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d\n",mumps->id.ICNTL(18));CHKERRQ(ierr);
2186       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d\n",mumps->id.ICNTL(19));CHKERRQ(ierr);
2187       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d\n",mumps->id.ICNTL(20));CHKERRQ(ierr);
2188       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d\n",mumps->id.ICNTL(21));CHKERRQ(ierr);
2189       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d\n",mumps->id.ICNTL(22));CHKERRQ(ierr);
2190       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23));CHKERRQ(ierr);
2191 
2192       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d\n",mumps->id.ICNTL(24));CHKERRQ(ierr);
2193       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d\n",mumps->id.ICNTL(25));CHKERRQ(ierr);
2194       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d\n",mumps->id.ICNTL(26));CHKERRQ(ierr);
2195       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d\n",mumps->id.ICNTL(27));CHKERRQ(ierr);
2196       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d\n",mumps->id.ICNTL(28));CHKERRQ(ierr);
2197       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d\n",mumps->id.ICNTL(29));CHKERRQ(ierr);
2198 
2199       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n",mumps->id.ICNTL(30));CHKERRQ(ierr);
2200       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d\n",mumps->id.ICNTL(31));CHKERRQ(ierr);
2201       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d\n",mumps->id.ICNTL(33));CHKERRQ(ierr);
2202       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d\n",mumps->id.ICNTL(35));CHKERRQ(ierr);
2203       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d\n",mumps->id.ICNTL(36));CHKERRQ(ierr);
2204       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d\n",mumps->id.ICNTL(38));CHKERRQ(ierr);
2205 
2206       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1));CHKERRQ(ierr);
2207       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr);
2208       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3));CHKERRQ(ierr);
2209       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4));CHKERRQ(ierr);
2210       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5));CHKERRQ(ierr);
2211       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7));CHKERRQ(ierr);
2212 
2213       /* information local to each processor */
2214       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
2215       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
2216       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr);
2217       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2218       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
2219       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr);
2220       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2221       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
2222       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr);
2223       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2224 
2225       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);
2226       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d\n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr);
2227       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2228 
2229       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);
2230       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr);
2231       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2232 
2233       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);
2234       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr);
2235       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2236 
2237       if (mumps->ninfo && mumps->ninfo <= 80) {
2238         PetscInt i;
2239         for (i=0; i<mumps->ninfo; i++) {
2240           ierr = PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "): \n",mumps->info[i]);CHKERRQ(ierr);
2241           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr);
2242           ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2243         }
2244       }
2245       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
2246 
2247       if (!mumps->myid) { /* information from the host */
2248         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr);
2249         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr);
2250         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr);
2251         ierr = PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr);
2252 
2253         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3));CHKERRQ(ierr);
2254         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4));CHKERRQ(ierr);
2255         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5));CHKERRQ(ierr);
2256         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6));CHKERRQ(ierr);
2257         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7));CHKERRQ(ierr);
2258         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8));CHKERRQ(ierr);
2259         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9));CHKERRQ(ierr);
2260         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10));CHKERRQ(ierr);
2261         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11));CHKERRQ(ierr);
2262         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12));CHKERRQ(ierr);
2263         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13));CHKERRQ(ierr);
2264         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14));CHKERRQ(ierr);
2265         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15));CHKERRQ(ierr);
2266         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16));CHKERRQ(ierr);
2267         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17));CHKERRQ(ierr);
2268         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18));CHKERRQ(ierr);
2269         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19));CHKERRQ(ierr);
2270         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20));CHKERRQ(ierr);
2271         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21));CHKERRQ(ierr);
2272         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22));CHKERRQ(ierr);
2273         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23));CHKERRQ(ierr);
2274         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24));CHKERRQ(ierr);
2275         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25));CHKERRQ(ierr);
2276         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr);
2277         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr);
2278         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr);
2279         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr);
2280         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr);
2281         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr);
2282         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr);
2283         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36));CHKERRQ(ierr);
2284         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37));CHKERRQ(ierr);
2285         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38));CHKERRQ(ierr);
2286         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39));CHKERRQ(ierr);
2287       }
2288     }
2289   }
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2294 {
2295   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2296 
2297   PetscFunctionBegin;
2298   info->block_size        = 1.0;
2299   info->nz_allocated      = mumps->id.INFOG(20);
2300   info->nz_used           = mumps->id.INFOG(20);
2301   info->nz_unneeded       = 0.0;
2302   info->assemblies        = 0.0;
2303   info->mallocs           = 0.0;
2304   info->memory            = 0.0;
2305   info->fill_ratio_given  = 0;
2306   info->fill_ratio_needed = 0;
2307   info->factor_mallocs    = 0;
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 /* -------------------------------------------------------------------------------------------*/
2312 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2313 {
2314   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2315   const PetscScalar *arr;
2316   const PetscInt    *idxs;
2317   PetscInt          size,i;
2318   PetscErrorCode    ierr;
2319 
2320   PetscFunctionBegin;
2321   ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr);
2322   if (mumps->petsc_size > 1) {
2323     PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2324 
2325     ls   = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2326     ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRMPI(ierr);
2327     if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc");
2328   }
2329 
2330   /* Schur complement matrix */
2331   ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
2332   ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr);
2333   ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr);
2334   mumps->id.schur      = (MumpsScalar*)arr;
2335   mumps->id.size_schur = size;
2336   mumps->id.schur_lld  = size;
2337   ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr);
2338   if (mumps->sym == 1) {
2339     ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
2340   }
2341 
2342   /* MUMPS expects Fortran style indices */
2343   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
2344   ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr);
2345   ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr);
2346   for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);}
2347   ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr);
2348   if (mumps->petsc_size > 1) {
2349     mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2350   } else {
2351     if (F->factortype == MAT_FACTOR_LU) {
2352       mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2353     } else {
2354       mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2355     }
2356   }
2357   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2358   mumps->id.ICNTL(26) = -1;
2359   PetscFunctionReturn(0);
2360 }
2361 
2362 /* -------------------------------------------------------------------------------------------*/
2363 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2364 {
2365   Mat            St;
2366   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2367   PetscScalar    *array;
2368 #if defined(PETSC_USE_COMPLEX)
2369   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2370 #endif
2371   PetscErrorCode ierr;
2372 
2373   PetscFunctionBegin;
2374   if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2375   ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr);
2376   ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr);
2377   ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr);
2378   ierr = MatSetUp(St);CHKERRQ(ierr);
2379   ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr);
2380   if (!mumps->sym) { /* MUMPS always return a full matrix */
2381     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2382       PetscInt i,j,N=mumps->id.size_schur;
2383       for (i=0;i<N;i++) {
2384         for (j=0;j<N;j++) {
2385 #if !defined(PETSC_USE_COMPLEX)
2386           PetscScalar val = mumps->id.schur[i*N+j];
2387 #else
2388           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2389 #endif
2390           array[j*N+i] = val;
2391         }
2392       }
2393     } else { /* stored by columns */
2394       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2395     }
2396   } else { /* either full or lower-triangular (not packed) */
2397     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2398       PetscInt i,j,N=mumps->id.size_schur;
2399       for (i=0;i<N;i++) {
2400         for (j=i;j<N;j++) {
2401 #if !defined(PETSC_USE_COMPLEX)
2402           PetscScalar val = mumps->id.schur[i*N+j];
2403 #else
2404           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2405 #endif
2406           array[i*N+j] = val;
2407           array[j*N+i] = val;
2408         }
2409       }
2410     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2411       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2412     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2413       PetscInt i,j,N=mumps->id.size_schur;
2414       for (i=0;i<N;i++) {
2415         for (j=0;j<i+1;j++) {
2416 #if !defined(PETSC_USE_COMPLEX)
2417           PetscScalar val = mumps->id.schur[i*N+j];
2418 #else
2419           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2420 #endif
2421           array[i*N+j] = val;
2422           array[j*N+i] = val;
2423         }
2424       }
2425     }
2426   }
2427   ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr);
2428   *S   = St;
2429   PetscFunctionReturn(0);
2430 }
2431 
2432 /* -------------------------------------------------------------------------------------------*/
2433 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2434 {
2435   PetscErrorCode ierr;
2436   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2437 
2438   PetscFunctionBegin;
2439   ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2444 {
2445   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2446 
2447   PetscFunctionBegin;
2448   *ival = mumps->id.ICNTL(icntl);
2449   PetscFunctionReturn(0);
2450 }
2451 
2452 /*@
2453   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2454 
2455    Logically Collective on Mat
2456 
2457    Input Parameters:
2458 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2459 .  icntl - index of MUMPS parameter array ICNTL()
2460 -  ival - value of MUMPS ICNTL(icntl)
2461 
2462   Options Database:
2463 .   -mat_mumps_icntl_<icntl> <ival>
2464 
2465    Level: beginner
2466 
2467    References:
2468 .     MUMPS Users' Guide
2469 
2470 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2471  @*/
2472 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2473 {
2474   PetscErrorCode ierr;
2475 
2476   PetscFunctionBegin;
2477   PetscValidType(F,1);
2478   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2479   PetscValidLogicalCollectiveInt(F,icntl,2);
2480   PetscValidLogicalCollectiveInt(F,ival,3);
2481   ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 /*@
2486   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2487 
2488    Logically Collective on Mat
2489 
2490    Input Parameters:
2491 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2492 -  icntl - index of MUMPS parameter array ICNTL()
2493 
2494   Output Parameter:
2495 .  ival - value of MUMPS ICNTL(icntl)
2496 
2497    Level: beginner
2498 
2499    References:
2500 .     MUMPS Users' Guide
2501 
2502 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2503 @*/
2504 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2505 {
2506   PetscErrorCode ierr;
2507 
2508   PetscFunctionBegin;
2509   PetscValidType(F,1);
2510   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2511   PetscValidLogicalCollectiveInt(F,icntl,2);
2512   PetscValidIntPointer(ival,3);
2513   ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 /* -------------------------------------------------------------------------------------------*/
2518 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2519 {
2520   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2521 
2522   PetscFunctionBegin;
2523   mumps->id.CNTL(icntl) = val;
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2528 {
2529   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2530 
2531   PetscFunctionBegin;
2532   *val = mumps->id.CNTL(icntl);
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 /*@
2537   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2538 
2539    Logically Collective on Mat
2540 
2541    Input Parameters:
2542 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2543 .  icntl - index of MUMPS parameter array CNTL()
2544 -  val - value of MUMPS CNTL(icntl)
2545 
2546   Options Database:
2547 .   -mat_mumps_cntl_<icntl> <val>
2548 
2549    Level: beginner
2550 
2551    References:
2552 .     MUMPS Users' Guide
2553 
2554 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2555 @*/
2556 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2557 {
2558   PetscErrorCode ierr;
2559 
2560   PetscFunctionBegin;
2561   PetscValidType(F,1);
2562   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2563   PetscValidLogicalCollectiveInt(F,icntl,2);
2564   PetscValidLogicalCollectiveReal(F,val,3);
2565   ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr);
2566   PetscFunctionReturn(0);
2567 }
2568 
2569 /*@
2570   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2571 
2572    Logically Collective on Mat
2573 
2574    Input Parameters:
2575 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2576 -  icntl - index of MUMPS parameter array CNTL()
2577 
2578   Output Parameter:
2579 .  val - value of MUMPS CNTL(icntl)
2580 
2581    Level: beginner
2582 
2583    References:
2584 .      MUMPS Users' Guide
2585 
2586 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2587 @*/
2588 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2589 {
2590   PetscErrorCode ierr;
2591 
2592   PetscFunctionBegin;
2593   PetscValidType(F,1);
2594   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2595   PetscValidLogicalCollectiveInt(F,icntl,2);
2596   PetscValidRealPointer(val,3);
2597   ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2598   PetscFunctionReturn(0);
2599 }
2600 
2601 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2602 {
2603   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2604 
2605   PetscFunctionBegin;
2606   *info = mumps->id.INFO(icntl);
2607   PetscFunctionReturn(0);
2608 }
2609 
2610 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2611 {
2612   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2613 
2614   PetscFunctionBegin;
2615   *infog = mumps->id.INFOG(icntl);
2616   PetscFunctionReturn(0);
2617 }
2618 
2619 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2620 {
2621   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2622 
2623   PetscFunctionBegin;
2624   *rinfo = mumps->id.RINFO(icntl);
2625   PetscFunctionReturn(0);
2626 }
2627 
2628 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2629 {
2630   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2631 
2632   PetscFunctionBegin;
2633   *rinfog = mumps->id.RINFOG(icntl);
2634   PetscFunctionReturn(0);
2635 }
2636 
2637 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2638 {
2639   PetscErrorCode ierr;
2640   Mat            Bt = NULL,Btseq = NULL;
2641   PetscBool      flg;
2642   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2643   PetscScalar    *aa;
2644   PetscInt       spnr,*ia,*ja,M,nrhs;
2645 
2646   PetscFunctionBegin;
2647   PetscValidPointer(spRHS,2);
2648   ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr);
2649   if (flg) {
2650     ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr);
2651   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2652 
2653   ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr);
2654 
2655   if (mumps->petsc_size > 1) {
2656     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2657     Btseq = b->A;
2658   } else {
2659     Btseq = Bt;
2660   }
2661 
2662   ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr);
2663   mumps->id.nrhs = nrhs;
2664   mumps->id.lrhs = M;
2665   mumps->id.rhs  = NULL;
2666 
2667   if (!mumps->myid) {
2668     ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr);
2669     ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2670     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2671     ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
2672     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2673   } else {
2674     mumps->id.irhs_ptr    = NULL;
2675     mumps->id.irhs_sparse = NULL;
2676     mumps->id.nz_rhs      = 0;
2677     mumps->id.rhs_sparse  = NULL;
2678   }
2679   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2680   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2681 
2682   /* solve phase */
2683   /*-------------*/
2684   mumps->id.job = JOB_SOLVE;
2685   PetscMUMPS_c(mumps);
2686   if (mumps->id.INFOG(1) < 0)
2687     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
2688 
2689   if (!mumps->myid) {
2690     ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr);
2691     ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2692     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2693   }
2694   PetscFunctionReturn(0);
2695 }
2696 
2697 /*@
2698   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2699 
2700    Logically Collective on Mat
2701 
2702    Input Parameters:
2703 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2704 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2705 
2706   Output Parameter:
2707 . spRHS - requested entries of inverse of A
2708 
2709    Level: beginner
2710 
2711    References:
2712 .      MUMPS Users' Guide
2713 
2714 .seealso: MatGetFactor(), MatCreateTranspose()
2715 @*/
2716 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2717 {
2718   PetscErrorCode ierr;
2719 
2720   PetscFunctionBegin;
2721   PetscValidType(F,1);
2722   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2723   ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr);
2724   PetscFunctionReturn(0);
2725 }
2726 
2727 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2728 {
2729   PetscErrorCode ierr;
2730   Mat            spRHS;
2731 
2732   PetscFunctionBegin;
2733   ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr);
2734   ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr);
2735   ierr = MatDestroy(&spRHS);CHKERRQ(ierr);
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 /*@
2740   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2741 
2742    Logically Collective on Mat
2743 
2744    Input Parameters:
2745 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2746 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2747 
2748   Output Parameter:
2749 . spRHST - requested entries of inverse of A^T
2750 
2751    Level: beginner
2752 
2753    References:
2754 .      MUMPS Users' Guide
2755 
2756 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2757 @*/
2758 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2759 {
2760   PetscErrorCode ierr;
2761   PetscBool      flg;
2762 
2763   PetscFunctionBegin;
2764   PetscValidType(F,1);
2765   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2766   ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
2767   if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2768 
2769   ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr);
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 /*@
2774   MatMumpsGetInfo - Get MUMPS parameter INFO()
2775 
2776    Logically Collective on Mat
2777 
2778    Input Parameters:
2779 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2780 -  icntl - index of MUMPS parameter array INFO()
2781 
2782   Output Parameter:
2783 .  ival - value of MUMPS INFO(icntl)
2784 
2785    Level: beginner
2786 
2787    References:
2788 .      MUMPS Users' Guide
2789 
2790 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2791 @*/
2792 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2793 {
2794   PetscErrorCode ierr;
2795 
2796   PetscFunctionBegin;
2797   PetscValidType(F,1);
2798   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2799   PetscValidIntPointer(ival,3);
2800   ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2801   PetscFunctionReturn(0);
2802 }
2803 
2804 /*@
2805   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2806 
2807    Logically Collective on Mat
2808 
2809    Input Parameters:
2810 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2811 -  icntl - index of MUMPS parameter array INFOG()
2812 
2813   Output Parameter:
2814 .  ival - value of MUMPS INFOG(icntl)
2815 
2816    Level: beginner
2817 
2818    References:
2819 .      MUMPS Users' Guide
2820 
2821 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2822 @*/
2823 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2824 {
2825   PetscErrorCode ierr;
2826 
2827   PetscFunctionBegin;
2828   PetscValidType(F,1);
2829   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2830   PetscValidIntPointer(ival,3);
2831   ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2832   PetscFunctionReturn(0);
2833 }
2834 
2835 /*@
2836   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2837 
2838    Logically Collective on Mat
2839 
2840    Input Parameters:
2841 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2842 -  icntl - index of MUMPS parameter array RINFO()
2843 
2844   Output Parameter:
2845 .  val - value of MUMPS RINFO(icntl)
2846 
2847    Level: beginner
2848 
2849    References:
2850 .       MUMPS Users' Guide
2851 
2852 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2853 @*/
2854 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2855 {
2856   PetscErrorCode ierr;
2857 
2858   PetscFunctionBegin;
2859   PetscValidType(F,1);
2860   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2861   PetscValidRealPointer(val,3);
2862   ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 /*@
2867   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2868 
2869    Logically Collective on Mat
2870 
2871    Input Parameters:
2872 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2873 -  icntl - index of MUMPS parameter array RINFOG()
2874 
2875   Output Parameter:
2876 .  val - value of MUMPS RINFOG(icntl)
2877 
2878    Level: beginner
2879 
2880    References:
2881 .      MUMPS Users' Guide
2882 
2883 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2884 @*/
2885 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2886 {
2887   PetscErrorCode ierr;
2888 
2889   PetscFunctionBegin;
2890   PetscValidType(F,1);
2891   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2892   PetscValidRealPointer(val,3);
2893   ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2894   PetscFunctionReturn(0);
2895 }
2896 
2897 /*MC
2898   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2899   distributed and sequential matrices via the external package MUMPS.
2900 
2901   Works with MATAIJ and MATSBAIJ matrices
2902 
2903   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2904 
2905   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2906 
2907   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2908 
2909   Options Database Keys:
2910 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2911 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2912 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2913 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2914 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2915 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2916                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2917 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2918 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2919 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2920 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2921 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2922 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2923 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2924 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2925 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2926 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2927 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2928 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2929 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2930 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2931 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2932 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2933 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2934 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2935 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2936 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2937 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2938 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2939 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2940 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2941 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2942 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2943 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2944 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2945                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2946 
2947   Level: beginner
2948 
2949     Notes:
2950     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2951 
2952     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2953 $          KSPGetPC(ksp,&pc);
2954 $          PCFactorGetMatrix(pc,&mat);
2955 $          MatMumpsGetInfo(mat,....);
2956 $          MatMumpsGetInfog(mat,....); etc.
2957            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2958 
2959   Using MUMPS with 64-bit integers
2960     MUMPS provides 64-bit integer support in two build modes:
2961       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2962       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2963 
2964       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2965       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2966       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2967       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2968 
2969     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2970 
2971   Two modes to run MUMPS/PETSc with OpenMP
2972 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2973 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2974 
2975 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2976 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2977 
2978    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2979    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2980    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2981    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2982    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2983 
2984    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2985    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2986    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2987    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2988    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2989    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2990    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2991    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2992    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2993    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2994    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2995    examine the mapping result.
2996 
2997    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2998    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2999    calls omp_set_num_threads(m) internally before calling MUMPS.
3000 
3001    References:
3002 +   1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
3003 -   2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
3004 
3005 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix()
3006 
3007 M*/
3008 
3009 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
3010 {
3011   PetscFunctionBegin;
3012   *type = MATSOLVERMUMPS;
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 /* MatGetFactor for Seq and MPI AIJ matrices */
3017 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
3018 {
3019   Mat            B;
3020   PetscErrorCode ierr;
3021   Mat_MUMPS      *mumps;
3022   PetscBool      isSeqAIJ;
3023   PetscMPIInt    size;
3024 
3025   PetscFunctionBegin;
3026  #if defined(PETSC_USE_COMPLEX)
3027   if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3028  #endif
3029   /* Create the factorization matrix */
3030   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
3031   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3032   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3033   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3034   ierr = MatSetUp(B);CHKERRQ(ierr);
3035 
3036   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3037 
3038   B->ops->view    = MatView_MUMPS;
3039   B->ops->getinfo = MatGetInfo_MUMPS;
3040 
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3054 
3055   if (ftype == MAT_FACTOR_LU) {
3056     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3057     B->factortype            = MAT_FACTOR_LU;
3058     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3059     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3060     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3061     mumps->sym = 0;
3062   } else {
3063     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3064     B->factortype                  = MAT_FACTOR_CHOLESKY;
3065     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3066     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3067     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr);
3068 #if defined(PETSC_USE_COMPLEX)
3069     mumps->sym = 2;
3070 #else
3071     if (A->spd_set && A->spd) mumps->sym = 1;
3072     else                      mumps->sym = 2;
3073 #endif
3074   }
3075 
3076   /* set solvertype */
3077   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3078   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3079   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3080   if (size == 1) {
3081     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3082     B->canuseordering = PETSC_TRUE;
3083   }
3084   B->ops->destroy = MatDestroy_MUMPS;
3085   B->data         = (void*)mumps;
3086 
3087   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3088 
3089   *F = B;
3090   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3091   PetscFunctionReturn(0);
3092 }
3093 
3094 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3095 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3096 {
3097   Mat            B;
3098   PetscErrorCode ierr;
3099   Mat_MUMPS      *mumps;
3100   PetscBool      isSeqSBAIJ;
3101   PetscMPIInt    size;
3102 
3103   PetscFunctionBegin;
3104  #if defined(PETSC_USE_COMPLEX)
3105   if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3106  #endif
3107   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3108   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3109   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3110   ierr = MatSetUp(B);CHKERRQ(ierr);
3111 
3112   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3113   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr);
3114   if (isSeqSBAIJ) {
3115     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3116   } else {
3117     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3118   }
3119 
3120   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3121   B->ops->view                   = MatView_MUMPS;
3122   B->ops->getinfo                = MatGetInfo_MUMPS;
3123 
3124   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3125   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3126   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3127   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3128   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3129   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3130   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3131   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3132   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3133   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3134   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3135   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3136   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3137 
3138   B->factortype = MAT_FACTOR_CHOLESKY;
3139 #if defined(PETSC_USE_COMPLEX)
3140   mumps->sym = 2;
3141 #else
3142   if (A->spd_set && A->spd) mumps->sym = 1;
3143   else                      mumps->sym = 2;
3144 #endif
3145 
3146   /* set solvertype */
3147   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3148   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3149   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3150   if (size == 1) {
3151     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3152     B->canuseordering = PETSC_TRUE;
3153   }
3154   ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr);
3155   B->ops->destroy = MatDestroy_MUMPS;
3156   B->data         = (void*)mumps;
3157 
3158   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3159 
3160   *F = B;
3161   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3162   PetscFunctionReturn(0);
3163 }
3164 
3165 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3166 {
3167   Mat            B;
3168   PetscErrorCode ierr;
3169   Mat_MUMPS      *mumps;
3170   PetscBool      isSeqBAIJ;
3171   PetscMPIInt    size;
3172 
3173   PetscFunctionBegin;
3174   /* Create the factorization matrix */
3175   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr);
3176   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3177   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3178   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3179   ierr = MatSetUp(B);CHKERRQ(ierr);
3180 
3181   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3182   if (ftype == MAT_FACTOR_LU) {
3183     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3184     B->factortype            = MAT_FACTOR_LU;
3185     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3186     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3187     mumps->sym = 0;
3188     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3189   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3190 
3191   B->ops->view        = MatView_MUMPS;
3192   B->ops->getinfo     = MatGetInfo_MUMPS;
3193 
3194   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3195   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3196   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3197   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3198   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3199   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3200   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3201   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3202   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3203   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3204   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3205   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3206   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3207 
3208   /* set solvertype */
3209   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3210   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3211   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3212   if (size == 1) {
3213     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3214     B->canuseordering = PETSC_TRUE;
3215   }
3216   B->ops->destroy = MatDestroy_MUMPS;
3217   B->data         = (void*)mumps;
3218 
3219   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3220 
3221   *F = B;
3222   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3223   PetscFunctionReturn(0);
3224 }
3225 
3226 /* MatGetFactor for Seq and MPI SELL matrices */
3227 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3228 {
3229   Mat            B;
3230   PetscErrorCode ierr;
3231   Mat_MUMPS      *mumps;
3232   PetscBool      isSeqSELL;
3233   PetscMPIInt    size;
3234 
3235   PetscFunctionBegin;
3236   /* Create the factorization matrix */
3237   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr);
3238   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3239   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3240   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3241   ierr = MatSetUp(B);CHKERRQ(ierr);
3242 
3243   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3244 
3245   B->ops->view        = MatView_MUMPS;
3246   B->ops->getinfo     = MatGetInfo_MUMPS;
3247 
3248   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3249   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3250   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3251   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3252   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3253   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3254   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3255   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3256   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3257   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3258   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3259 
3260   if (ftype == MAT_FACTOR_LU) {
3261     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3262     B->factortype            = MAT_FACTOR_LU;
3263     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3264     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3265     mumps->sym = 0;
3266     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3267   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3268 
3269   /* set solvertype */
3270   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3271   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3272   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3273   if (size == 1) {
3274     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3275     B->canuseordering = PETSC_TRUE;
3276   }
3277   B->ops->destroy = MatDestroy_MUMPS;
3278   B->data         = (void*)mumps;
3279 
3280   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3281 
3282   *F = B;
3283   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
3284   PetscFunctionReturn(0);
3285 }
3286 
3287 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3288 {
3289   PetscErrorCode ierr;
3290 
3291   PetscFunctionBegin;
3292   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3293   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3294   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3295   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3296   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3297   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3298   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3299   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3300   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3301   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3302   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr);
3303   PetscFunctionReturn(0);
3304 }
3305 
3306