xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 8ebe3e4e9e00d86ece2e9fcd0cc84910b0ad437c)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30 
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45 
46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48    naming convention in PetscMPIInt, PetscBLASInt etc.
49 */
50 typedef MUMPS_INT PetscMUMPSInt;
51 
52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55   #endif
56 #else
57   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #endif
61 
62 #define MPIU_MUMPSINT             MPI_INT
63 #define PETSC_MUMPS_INT_MAX       2147483647
64 #define PETSC_MUMPS_INT_MIN       -2147483648
65 
66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68 {
69   PetscFunctionBegin;
70   if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
71   *b = (PetscMUMPSInt)(a);
72   PetscFunctionReturn(0);
73 }
74 
75 /* Put these utility routines here since they are only used in this file */
76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
77 {
78   PetscErrorCode ierr;
79   PetscInt       myval;
80   PetscBool      myset;
81   PetscFunctionBegin;
82   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
83   ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr);
84   if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);}
85   if (set) *set = myset;
86   PetscFunctionReturn(0);
87 }
88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
89 
90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
91 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
92 #define PetscMUMPS_c(mumps) \
93   do { \
94     if (mumps->use_petsc_omp_support) { \
95       if (mumps->is_omp_master) { \
96         ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \
97         MUMPS_c(&mumps->id); \
98         ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \
99       } \
100       ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \
101       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
102          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
103          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
104          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105       */ \
106       ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\
107       ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm);CHKERRMPI(ierr);\
108       ierr = MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRMPI(ierr);\
109     } else { \
110       MUMPS_c(&mumps->id); \
111     } \
112   } while (0)
113 #else
114 #define PetscMUMPS_c(mumps) \
115   do { MUMPS_c(&mumps->id); } while (0)
116 #endif
117 
118 /* declare MumpsScalar */
119 #if defined(PETSC_USE_COMPLEX)
120 #if defined(PETSC_USE_REAL_SINGLE)
121 #define MumpsScalar mumps_complex
122 #else
123 #define MumpsScalar mumps_double_complex
124 #endif
125 #else
126 #define MumpsScalar PetscScalar
127 #endif
128 
129 /* macros s.t. indices match MUMPS documentation */
130 #define ICNTL(I) icntl[(I)-1]
131 #define CNTL(I) cntl[(I)-1]
132 #define INFOG(I) infog[(I)-1]
133 #define INFO(I) info[(I)-1]
134 #define RINFOG(I) rinfog[(I)-1]
135 #define RINFO(I) rinfo[(I)-1]
136 
137 typedef struct Mat_MUMPS Mat_MUMPS;
138 struct Mat_MUMPS {
139 #if defined(PETSC_USE_COMPLEX)
140 #if defined(PETSC_USE_REAL_SINGLE)
141   CMUMPS_STRUC_C id;
142 #else
143   ZMUMPS_STRUC_C id;
144 #endif
145 #else
146 #if defined(PETSC_USE_REAL_SINGLE)
147   SMUMPS_STRUC_C id;
148 #else
149   DMUMPS_STRUC_C id;
150 #endif
151 #endif
152 
153   MatStructure   matstruc;
154   PetscMPIInt    myid,petsc_size;
155   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
156   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
157   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
158   PetscMUMPSInt  sym;
159   MPI_Comm       mumps_comm;
160   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
161   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
162   PetscMUMPSInt  ICNTL20;               /* use centralized (0) or distributed (10) dense RHS */
163   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
164 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
165   PetscInt       *rhs_nrow,max_nrhs;
166   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
167   PetscScalar    *rhs_loc,*rhs_recvbuf;
168 #endif
169   Vec            b_seq,x_seq;
170   PetscInt       ninfo,*info;           /* which INFO to display */
171   PetscInt       sizeredrhs;
172   PetscScalar    *schur_sol;
173   PetscInt       schur_sizesol;
174   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
175   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
176   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
177 
178   /* stuff used by petsc/mumps OpenMP support*/
179   PetscBool      use_petsc_omp_support;
180   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
181   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
182   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
183   PetscMPIInt    tag,omp_comm_size;
184   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
185   MPI_Request    *reqs;
186 };
187 
188 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
189    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
190  */
191 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
192 {
193   PetscErrorCode ierr;
194   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
195 
196   PetscFunctionBegin;
197 #if defined(PETSC_USE_64BIT_INDICES)
198   {
199     PetscInt i;
200     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
201       ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
202       ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr);
203       mumps->cur_ilen = nrow+1;
204     }
205     if (nnz > mumps->cur_jlen) {
206       ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
207       ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr);
208       mumps->cur_jlen = nnz;
209     }
210     for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);}
211     for (i=0; i<nnz; i++)    {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);}
212     *ia_mumps = mumps->ia_alloc;
213     *ja_mumps = mumps->ja_alloc;
214   }
215 #else
216   *ia_mumps = ia;
217   *ja_mumps = ja;
218 #endif
219   ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
224 {
225   PetscErrorCode ierr;
226 
227   PetscFunctionBegin;
228   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
229   ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
230   ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
231   mumps->id.size_schur = 0;
232   mumps->id.schur_lld  = 0;
233   mumps->id.ICNTL(19)  = 0;
234   PetscFunctionReturn(0);
235 }
236 
237 /* solve with rhs in mumps->id.redrhs and return in the same location */
238 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
239 {
240   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
241   Mat                  S,B,X;
242   MatFactorSchurStatus schurstatus;
243   PetscInt             sizesol;
244   PetscErrorCode       ierr;
245 
246   PetscFunctionBegin;
247   ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr);
248   ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr);
249   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr);
250   ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr);
251 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
252   ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr);
253 #endif
254   switch (schurstatus) {
255   case MAT_FACTOR_SCHUR_FACTORED:
256     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr);
257     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
258 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
259     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
260 #endif
261     if (!mumps->id.ICNTL(9)) { /* transpose solve */
262       ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr);
263     } else {
264       ierr = MatMatSolve(S,B,X);CHKERRQ(ierr);
265     }
266     break;
267   case MAT_FACTOR_SCHUR_INVERTED:
268     sizesol = mumps->id.nrhs*mumps->id.size_schur;
269     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
270       ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
271       ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr);
272       mumps->schur_sizesol = sizesol;
273     }
274     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr);
275     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
276 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
277     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
278 #endif
279     ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr);
280     if (!mumps->id.ICNTL(9)) { /* transpose solve */
281       ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr);
282     } else {
283       ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr);
284     }
285     ierr = MatProductSetFromOptions(X);CHKERRQ(ierr);
286     ierr = MatProductSymbolic(X);CHKERRQ(ierr);
287     ierr = MatProductNumeric(X);CHKERRQ(ierr);
288 
289     ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
290     break;
291   default:
292     SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %d",F->schur_status);
293   }
294   ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr);
295   ierr = MatDestroy(&B);CHKERRQ(ierr);
296   ierr = MatDestroy(&X);CHKERRQ(ierr);
297   PetscFunctionReturn(0);
298 }
299 
300 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
301 {
302   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
303   PetscErrorCode ierr;
304 
305   PetscFunctionBegin;
306   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
307     PetscFunctionReturn(0);
308   }
309   if (!expansion) { /* prepare for the condensation step */
310     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
311     /* allocate MUMPS internal array to store reduced right-hand sides */
312     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
313       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
314       mumps->id.lredrhs = mumps->id.size_schur;
315       ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr);
316       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
317     }
318     mumps->id.ICNTL(26) = 1; /* condensation phase */
319   } else { /* prepare for the expansion step */
320     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
321     ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr);
322     mumps->id.ICNTL(26) = 2; /* expansion phase */
323     PetscMUMPS_c(mumps);
324     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
325     /* restore defaults */
326     mumps->id.ICNTL(26) = -1;
327     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
328     if (mumps->id.nrhs > 1) {
329       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
330       mumps->id.lredrhs = 0;
331       mumps->sizeredrhs = 0;
332     }
333   }
334   PetscFunctionReturn(0);
335 }
336 
337 /*
338   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
339 
340   input:
341     A       - matrix in aij,baij or sbaij format
342     shift   - 0: C style output triple; 1: Fortran style output triple.
343     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
344               MAT_REUSE_MATRIX:   only the values in v array are updated
345   output:
346     nnz     - dim of r, c, and v (number of local nonzero entries of A)
347     r, c, v - row and col index, matrix values (matrix triples)
348 
349   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
350   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
351   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
352 
353  */
354 
355 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
356 {
357   const PetscScalar *av;
358   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
359   PetscInt64        nz,rnz,i,j,k;
360   PetscErrorCode    ierr;
361   PetscMUMPSInt     *row,*col;
362   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
363 
364   PetscFunctionBegin;
365   ierr       = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
366   mumps->val = (PetscScalar*)av;
367   if (reuse == MAT_INITIAL_MATRIX) {
368     nz   = aa->nz;
369     ai   = aa->i;
370     aj   = aa->j;
371     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
372     for (i=k=0; i<M; i++) {
373       rnz = ai[i+1] - ai[i];
374       ajj = aj + ai[i];
375       for (j=0; j<rnz; j++) {
376         ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr);
377         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr);
378         k++;
379       }
380     }
381     mumps->irn = row;
382     mumps->jcn = col;
383     mumps->nnz = nz;
384   }
385   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
390 {
391   PetscErrorCode ierr;
392   PetscInt64     nz,i,j,k,r;
393   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
394   PetscMUMPSInt  *row,*col;
395 
396   PetscFunctionBegin;
397   mumps->val = a->val;
398   if (reuse == MAT_INITIAL_MATRIX) {
399     nz   = a->sliidx[a->totalslices];
400     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
401     for (i=k=0; i<a->totalslices; i++) {
402       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
403         ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr);
404       }
405     }
406     for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);}
407     mumps->irn = row;
408     mumps->jcn = col;
409     mumps->nnz = nz;
410   }
411   PetscFunctionReturn(0);
412 }
413 
414 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
415 {
416   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
417   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
418   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
419   PetscInt       bs;
420   PetscErrorCode ierr;
421   PetscMUMPSInt  *row,*col;
422 
423   PetscFunctionBegin;
424   ierr       = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
425   M          = A->rmap->N/bs;
426   mumps->val = aa->a;
427   if (reuse == MAT_INITIAL_MATRIX) {
428     ai   = aa->i; aj = aa->j;
429     nz   = bs2*aa->nz;
430     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
431     for (i=0; i<M; i++) {
432       ajj = aj + ai[i];
433       rnz = ai[i+1] - ai[i];
434       for (k=0; k<rnz; k++) {
435         for (j=0; j<bs; j++) {
436           for (m=0; m<bs; m++) {
437             ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr);
438             ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr);
439             idx++;
440           }
441         }
442       }
443     }
444     mumps->irn = row;
445     mumps->jcn = col;
446     mumps->nnz = nz;
447   }
448   PetscFunctionReturn(0);
449 }
450 
451 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
452 {
453   const PetscInt *ai, *aj,*ajj;
454   PetscInt        bs;
455   PetscInt64      nz,rnz,i,j,k,m;
456   PetscErrorCode  ierr;
457   PetscMUMPSInt   *row,*col;
458   PetscScalar     *val;
459   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
460   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
461 #if defined(PETSC_USE_COMPLEX)
462   PetscBool       hermitian;
463 #endif
464 
465   PetscFunctionBegin;
466 #if defined(PETSC_USE_COMPLEX)
467   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
468   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
469 #endif
470   ai   = aa->i;
471   aj   = aa->j;
472   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
473   if (reuse == MAT_INITIAL_MATRIX) {
474     nz   = aa->nz;
475     ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr);
476     if (bs>1) {
477       ierr       = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr);
478       mumps->val = mumps->val_alloc;
479     } else {
480       mumps->val = aa->a;
481     }
482     mumps->irn = row;
483     mumps->jcn = col;
484   } else {
485     if (bs == 1) mumps->val = aa->a;
486     row = mumps->irn;
487     col = mumps->jcn;
488   }
489   val = mumps->val;
490 
491   nz = 0;
492   if (bs>1) {
493     for (i=0; i<mbs; i++) {
494       rnz = ai[i+1] - ai[i];
495       ajj = aj + ai[i];
496       for (j=0; j<rnz; j++) {
497         for (k=0; k<bs; k++) {
498           for (m=0; m<bs; m++) {
499             if (ajj[j]>i || k>=m) {
500               if (reuse == MAT_INITIAL_MATRIX) {
501                 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr);
502                 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr);
503               }
504               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
505             }
506           }
507         }
508       }
509     }
510   } else if (reuse == MAT_INITIAL_MATRIX) {
511     for (i=0; i<mbs; i++) {
512       rnz = ai[i+1] - ai[i];
513       ajj = aj + ai[i];
514       for (j=0; j<rnz; j++) {
515         ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
516         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
517         nz++;
518       }
519     }
520     if (PetscUnlikely(nz != aa->nz)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %" PetscInt64_FMT " != %" PetscInt_FMT,nz,aa->nz);
521   }
522   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
523   PetscFunctionReturn(0);
524 }
525 
526 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
527 {
528   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
529   PetscInt64        nz,rnz,i,j;
530   const PetscScalar *av,*v1;
531   PetscScalar       *val;
532   PetscErrorCode    ierr;
533   PetscMUMPSInt     *row,*col;
534   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
535   PetscBool         missing;
536 #if defined(PETSC_USE_COMPLEX)
537   PetscBool         hermitian;
538 #endif
539 
540   PetscFunctionBegin;
541 #if defined(PETSC_USE_COMPLEX)
542   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
543   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
544 #endif
545   ierr  = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
546   ai    = aa->i; aj = aa->j;
547   adiag = aa->diag;
548   ierr  = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr);
549   if (reuse == MAT_INITIAL_MATRIX) {
550     /* count nz in the upper triangular part of A */
551     nz = 0;
552     if (missing) {
553       for (i=0; i<M; i++) {
554         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
555           for (j=ai[i];j<ai[i+1];j++) {
556             if (aj[j] < i) continue;
557             nz++;
558           }
559         } else {
560           nz += ai[i+1] - adiag[i];
561         }
562       }
563     } else {
564       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
565     }
566     ierr       = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
567     ierr       = PetscMalloc1(nz,&val);CHKERRQ(ierr);
568     mumps->nnz = nz;
569     mumps->irn = row;
570     mumps->jcn = col;
571     mumps->val = mumps->val_alloc = val;
572 
573     nz = 0;
574     if (missing) {
575       for (i=0; i<M; i++) {
576         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
577           for (j=ai[i];j<ai[i+1];j++) {
578             if (aj[j] < i) continue;
579             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
580             ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr);
581             val[nz] = av[j];
582             nz++;
583           }
584         } else {
585           rnz = ai[i+1] - adiag[i];
586           ajj = aj + adiag[i];
587           v1  = av + adiag[i];
588           for (j=0; j<rnz; j++) {
589             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
590             ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
591             val[nz++] = v1[j];
592           }
593         }
594       }
595     } else {
596       for (i=0; i<M; i++) {
597         rnz = ai[i+1] - adiag[i];
598         ajj = aj + adiag[i];
599         v1  = av + adiag[i];
600         for (j=0; j<rnz; j++) {
601           ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
602           ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
603           val[nz++] = v1[j];
604         }
605       }
606     }
607   } else {
608     nz = 0;
609     val = mumps->val;
610     if (missing) {
611       for (i=0; i <M; i++) {
612         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
613           for (j=ai[i];j<ai[i+1];j++) {
614             if (aj[j] < i) continue;
615             val[nz++] = av[j];
616           }
617         } else {
618           rnz = ai[i+1] - adiag[i];
619           v1  = av + adiag[i];
620           for (j=0; j<rnz; j++) {
621             val[nz++] = v1[j];
622           }
623         }
624       }
625     } else {
626       for (i=0; i <M; i++) {
627         rnz = ai[i+1] - adiag[i];
628         v1  = av + adiag[i];
629         for (j=0; j<rnz; j++) {
630           val[nz++] = v1[j];
631         }
632       }
633     }
634   }
635   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
636   PetscFunctionReturn(0);
637 }
638 
639 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
640 {
641   PetscErrorCode    ierr;
642   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
643   PetscInt          bs;
644   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
645   PetscMUMPSInt     *row,*col;
646   const PetscScalar *av,*bv,*v1,*v2;
647   PetscScalar       *val;
648   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
649   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
650   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
651   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
652 #if defined(PETSC_USE_COMPLEX)
653   PetscBool         hermitian;
654 #endif
655 
656   PetscFunctionBegin;
657 #if defined(PETSC_USE_COMPLEX)
658   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
659   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
660 #endif
661   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
662   rstart = A->rmap->rstart;
663   ai = aa->i;
664   aj = aa->j;
665   bi = bb->i;
666   bj = bb->j;
667   av = aa->a;
668   bv = bb->a;
669 
670   garray = mat->garray;
671 
672   if (reuse == MAT_INITIAL_MATRIX) {
673     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
674     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
675     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
676     /* can not decide the exact mumps->nnz now because of the SBAIJ */
677     mumps->irn = row;
678     mumps->jcn = col;
679     mumps->val = mumps->val_alloc = val;
680   } else {
681     val = mumps->val;
682   }
683 
684   jj = 0; irow = rstart;
685   for (i=0; i<mbs; i++) {
686     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
687     countA = ai[i+1] - ai[i];
688     countB = bi[i+1] - bi[i];
689     bjj    = bj + bi[i];
690     v1     = av + ai[i]*bs2;
691     v2     = bv + bi[i]*bs2;
692 
693     if (bs>1) {
694       /* A-part */
695       for (j=0; j<countA; j++) {
696         for (k=0; k<bs; k++) {
697           for (m=0; m<bs; m++) {
698             if (rstart + ajj[j]*bs>irow || k>=m) {
699               if (reuse == MAT_INITIAL_MATRIX) {
700                 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
701                 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
702               }
703               val[jj++] = v1[j*bs2 + m + k*bs];
704             }
705           }
706         }
707       }
708 
709       /* B-part */
710       for (j=0; j < countB; j++) {
711         for (k=0; k<bs; k++) {
712           for (m=0; m<bs; m++) {
713             if (reuse == MAT_INITIAL_MATRIX) {
714               ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
715               ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
716             }
717             val[jj++] = v2[j*bs2 + m + k*bs];
718           }
719         }
720       }
721     } else {
722       /* A-part */
723       for (j=0; j<countA; j++) {
724         if (reuse == MAT_INITIAL_MATRIX) {
725           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
726           ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
727         }
728         val[jj++] = v1[j];
729       }
730 
731       /* B-part */
732       for (j=0; j < countB; j++) {
733         if (reuse == MAT_INITIAL_MATRIX) {
734           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
735           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
736         }
737         val[jj++] = v2[j];
738       }
739     }
740     irow+=bs;
741   }
742   mumps->nnz = jj;
743   PetscFunctionReturn(0);
744 }
745 
746 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
747 {
748   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
749   PetscErrorCode    ierr;
750   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
751   PetscMUMPSInt     *row,*col;
752   const PetscScalar *av, *bv,*v1,*v2;
753   PetscScalar       *val;
754   Mat               Ad,Ao;
755   Mat_SeqAIJ        *aa;
756   Mat_SeqAIJ        *bb;
757 
758   PetscFunctionBegin;
759   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
760   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
761   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
762 
763   aa = (Mat_SeqAIJ*)(Ad)->data;
764   bb = (Mat_SeqAIJ*)(Ao)->data;
765   ai = aa->i;
766   aj = aa->j;
767   bi = bb->i;
768   bj = bb->j;
769 
770   rstart = A->rmap->rstart;
771 
772   if (reuse == MAT_INITIAL_MATRIX) {
773     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
774     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
775     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
776     mumps->nnz = nz;
777     mumps->irn = row;
778     mumps->jcn = col;
779     mumps->val = mumps->val_alloc = val;
780   } else {
781     val = mumps->val;
782   }
783 
784   jj = 0; irow = rstart;
785   for (i=0; i<m; i++) {
786     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
787     countA = ai[i+1] - ai[i];
788     countB = bi[i+1] - bi[i];
789     bjj    = bj + bi[i];
790     v1     = av + ai[i];
791     v2     = bv + bi[i];
792 
793     /* A-part */
794     for (j=0; j<countA; j++) {
795       if (reuse == MAT_INITIAL_MATRIX) {
796         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
797         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
798       }
799       val[jj++] = v1[j];
800     }
801 
802     /* B-part */
803     for (j=0; j < countB; j++) {
804       if (reuse == MAT_INITIAL_MATRIX) {
805         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
806         ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
807       }
808       val[jj++] = v2[j];
809     }
810     irow++;
811   }
812   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
813   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
814   PetscFunctionReturn(0);
815 }
816 
817 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
818 {
819   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
820   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
821   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
822   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
823   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
824   const PetscInt    bs2=mat->bs2;
825   PetscErrorCode    ierr;
826   PetscInt          bs;
827   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
828   PetscMUMPSInt     *row,*col;
829   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
830   PetscScalar       *val;
831 
832   PetscFunctionBegin;
833   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
834   if (reuse == MAT_INITIAL_MATRIX) {
835     nz   = bs2*(aa->nz + bb->nz);
836     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
837     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
838     mumps->nnz = nz;
839     mumps->irn = row;
840     mumps->jcn = col;
841     mumps->val = mumps->val_alloc = val;
842   } else {
843     val = mumps->val;
844   }
845 
846   jj = 0; irow = rstart;
847   for (i=0; i<mbs; i++) {
848     countA = ai[i+1] - ai[i];
849     countB = bi[i+1] - bi[i];
850     ajj    = aj + ai[i];
851     bjj    = bj + bi[i];
852     v1     = av + bs2*ai[i];
853     v2     = bv + bs2*bi[i];
854 
855     idx = 0;
856     /* A-part */
857     for (k=0; k<countA; k++) {
858       for (j=0; j<bs; j++) {
859         for (n=0; n<bs; n++) {
860           if (reuse == MAT_INITIAL_MATRIX) {
861             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
862             ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr);
863           }
864           val[jj++] = v1[idx++];
865         }
866       }
867     }
868 
869     idx = 0;
870     /* B-part */
871     for (k=0; k<countB; k++) {
872       for (j=0; j<bs; j++) {
873         for (n=0; n<bs; n++) {
874           if (reuse == MAT_INITIAL_MATRIX) {
875             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
876             ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr);
877           }
878           val[jj++] = v2[idx++];
879         }
880       }
881     }
882     irow += bs;
883   }
884   PetscFunctionReturn(0);
885 }
886 
887 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
888 {
889   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
890   PetscErrorCode    ierr;
891   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
892   PetscMUMPSInt     *row,*col;
893   const PetscScalar *av, *bv,*v1,*v2;
894   PetscScalar       *val;
895   Mat               Ad,Ao;
896   Mat_SeqAIJ        *aa;
897   Mat_SeqAIJ        *bb;
898 #if defined(PETSC_USE_COMPLEX)
899   PetscBool         hermitian;
900 #endif
901 
902   PetscFunctionBegin;
903 #if defined(PETSC_USE_COMPLEX)
904   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
905   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
906 #endif
907   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
908   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
909   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
910 
911   aa    = (Mat_SeqAIJ*)(Ad)->data;
912   bb    = (Mat_SeqAIJ*)(Ao)->data;
913   ai    = aa->i;
914   aj    = aa->j;
915   adiag = aa->diag;
916   bi    = bb->i;
917   bj    = bb->j;
918 
919   rstart = A->rmap->rstart;
920 
921   if (reuse == MAT_INITIAL_MATRIX) {
922     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
923     nzb = 0;    /* num of upper triangular entries in mat->B */
924     for (i=0; i<m; i++) {
925       nza   += (ai[i+1] - adiag[i]);
926       countB = bi[i+1] - bi[i];
927       bjj    = bj + bi[i];
928       for (j=0; j<countB; j++) {
929         if (garray[bjj[j]] > rstart) nzb++;
930       }
931     }
932 
933     nz   = nza + nzb; /* total nz of upper triangular part of mat */
934     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
935     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
936     mumps->nnz = nz;
937     mumps->irn = row;
938     mumps->jcn = col;
939     mumps->val = mumps->val_alloc = val;
940   } else {
941     val = mumps->val;
942   }
943 
944   jj = 0; irow = rstart;
945   for (i=0; i<m; i++) {
946     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
947     v1     = av + adiag[i];
948     countA = ai[i+1] - adiag[i];
949     countB = bi[i+1] - bi[i];
950     bjj    = bj + bi[i];
951     v2     = bv + bi[i];
952 
953     /* A-part */
954     for (j=0; j<countA; j++) {
955       if (reuse == MAT_INITIAL_MATRIX) {
956         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
957         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
958       }
959       val[jj++] = v1[j];
960     }
961 
962     /* B-part */
963     for (j=0; j < countB; j++) {
964       if (garray[bjj[j]] > rstart) {
965         if (reuse == MAT_INITIAL_MATRIX) {
966           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
967           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
968         }
969         val[jj++] = v2[j];
970       }
971     }
972     irow++;
973   }
974   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
975   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
976   PetscFunctionReturn(0);
977 }
978 
979 PetscErrorCode MatDestroy_MUMPS(Mat A)
980 {
981   PetscErrorCode ierr;
982   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
983 
984   PetscFunctionBegin;
985   ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
986   ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr);
987   ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
988   ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr);
989   ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
990   ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr);
991   ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
992   ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
993   ierr = PetscFree(mumps->info);CHKERRQ(ierr);
994   ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
995   mumps->id.job = JOB_END;
996   PetscMUMPS_c(mumps);
997   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
998 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
999   if (mumps->use_petsc_omp_support) {
1000     ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr);
1001     ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1002     ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr);
1003   }
1004 #endif
1005   ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
1006   ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
1007   ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr);
1008   ierr = PetscFree(mumps->reqs);CHKERRQ(ierr);
1009   ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr);
1010   if (mumps->mumps_comm != MPI_COMM_NULL) {ierr = PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);CHKERRQ(ierr);}
1011   ierr = PetscFree(A->data);CHKERRQ(ierr);
1012 
1013   /* clear composed functions */
1014   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1015   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr);
1016   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr);
1017   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr);
1018   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr);
1019   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr);
1020   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr);
1021   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr);
1022   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr);
1023   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr);
1024   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr);
1025   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr);
1026   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1031 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1032 {
1033   PetscErrorCode     ierr;
1034   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1035   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1036   PetscInt           i,m,M,rstart;
1037 
1038   PetscFunctionBegin;
1039   ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr);
1040   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
1041   if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1042   if (ompsize == 1) {
1043     if (!mumps->irhs_loc) {
1044       mumps->nloc_rhs = m;
1045       ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr);
1046       ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1047       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1048     }
1049     mumps->id.rhs_loc = (MumpsScalar*)array;
1050   } else {
1051   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1052     const PetscInt  *ranges;
1053     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1054     MPI_Group       petsc_group,omp_group;
1055     PetscScalar     *recvbuf=NULL;
1056 
1057     if (mumps->is_omp_master) {
1058       /* Lazily initialize the omp stuff for distributed rhs */
1059       if (!mumps->irhs_loc) {
1060         ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr);
1061         ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr);
1062         ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRMPI(ierr);
1063         ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRMPI(ierr);
1064         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1065         ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRMPI(ierr);
1066 
1067         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1068         mumps->nloc_rhs = 0;
1069         ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr);
1070         for (j=0; j<ompsize; j++) {
1071           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1072           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1073         }
1074         ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr);
1075         for (j=k=0; j<ompsize; j++) {
1076           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1077         }
1078 
1079         ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr);
1080         ierr = MPI_Group_free(&petsc_group);CHKERRMPI(ierr);
1081         ierr = MPI_Group_free(&omp_group);CHKERRMPI(ierr);
1082       }
1083 
1084       /* Realloc buffers when current nrhs is bigger than what we have met */
1085       if (nrhs > mumps->max_nrhs) {
1086         ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1087         ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr);
1088         mumps->max_nrhs = nrhs;
1089       }
1090 
1091       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1092       for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);}
1093       mumps->rhs_disps[0] = 0;
1094       for (j=1; j<ompsize; j++) {
1095         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1096         if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1097       }
1098       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1099     }
1100 
1101     ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr);
1102     ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRMPI(ierr);
1103 
1104     if (mumps->is_omp_master) {
1105       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1106         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1107         for (j=0; j<ompsize; j++) {
1108           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1109           dst = dstbase;
1110           for (i=0; i<nrhs; i++) {
1111             ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr);
1112             src += mumps->rhs_nrow[j];
1113             dst += mumps->nloc_rhs;
1114           }
1115           dstbase += mumps->rhs_nrow[j];
1116         }
1117       }
1118       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1119     }
1120   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1121   }
1122   mumps->id.nrhs     = nrhs;
1123   mumps->id.nloc_rhs = mumps->nloc_rhs;
1124   mumps->id.lrhs_loc = mumps->nloc_rhs;
1125   mumps->id.irhs_loc = mumps->irhs_loc;
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1130 {
1131   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1132   const PetscScalar  *rarray = NULL;
1133   PetscScalar        *array;
1134   IS                 is_iden,is_petsc;
1135   PetscErrorCode     ierr;
1136   PetscInt           i;
1137   PetscBool          second_solve = PETSC_FALSE;
1138   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1139 
1140   PetscFunctionBegin;
1141   ierr = PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1);CHKERRQ(ierr);
1142   ierr = PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2);CHKERRQ(ierr);
1143 
1144   if (A->factorerrortype) {
1145     ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1146     ierr = VecSetInf(x);CHKERRQ(ierr);
1147     PetscFunctionReturn(0);
1148   }
1149 
1150   mumps->id.nrhs = 1;
1151   if (mumps->petsc_size > 1) {
1152     if (mumps->ICNTL20 == 10) {
1153       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1154       ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr);
1155       ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr);
1156     } else {
1157       mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential rhs vector*/
1158       ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1159       ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1160       if (!mumps->myid) {
1161         ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr);
1162         mumps->id.rhs = (MumpsScalar*)array;
1163       }
1164     }
1165   } else {  /* petsc_size == 1 */
1166     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1167     ierr = VecCopy(b,x);CHKERRQ(ierr);
1168     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
1169     mumps->id.rhs = (MumpsScalar*)array;
1170   }
1171 
1172   /*
1173      handle condensation step of Schur complement (if any)
1174      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1175      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1176      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1177      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1178   */
1179   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1180     if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1181     second_solve = PETSC_TRUE;
1182     ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1183   }
1184   /* solve phase */
1185   /*-------------*/
1186   mumps->id.job = JOB_SOLVE;
1187   PetscMUMPS_c(mumps);
1188   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1189 
1190   /* handle expansion step of Schur complement (if any) */
1191   if (second_solve) {
1192     ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1193   }
1194 
1195   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1196     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1197       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1198       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1199     }
1200     if (!mumps->scat_sol) { /* create scatter scat_sol */
1201       PetscInt *isol2_loc=NULL;
1202       ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
1203       ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr);
1204       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1205       ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr);  /* to */
1206       ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr);
1207       ierr = ISDestroy(&is_iden);CHKERRQ(ierr);
1208       ierr = ISDestroy(&is_petsc);CHKERRQ(ierr);
1209       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1210     }
1211 
1212     ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1213     ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1214   }
1215 
1216   if (mumps->petsc_size > 1) {
1217     if (mumps->ICNTL20 == 10) {
1218       ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr);
1219     } else if (!mumps->myid) {
1220       ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr);
1221     }
1222   } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);}
1223 
1224   ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1229 {
1230   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   mumps->id.ICNTL(9) = 0;
1235   ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr);
1236   mumps->id.ICNTL(9) = 1;
1237   PetscFunctionReturn(0);
1238 }
1239 
1240 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1241 {
1242   PetscErrorCode    ierr;
1243   Mat               Bt = NULL;
1244   PetscBool         denseX,denseB,flg,flgT;
1245   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1246   PetscInt          i,nrhs,M;
1247   PetscScalar       *array;
1248   const PetscScalar *rbray;
1249   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1250   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1251   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1252   IS                is_to,is_from;
1253   PetscInt          k,proc,j,m,myrstart;
1254   const PetscInt    *rstart;
1255   Vec               v_mpi,msol_loc;
1256   VecScatter        scat_sol;
1257   Vec               b_seq;
1258   VecScatter        scat_rhs;
1259   PetscScalar       *aa;
1260   PetscInt          spnr,*ia,*ja;
1261   Mat_MPIAIJ        *b = NULL;
1262 
1263   PetscFunctionBegin;
1264   ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1265   if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1266 
1267   ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1268   if (denseB) {
1269     if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1270     mumps->id.ICNTL(20)= 0; /* dense RHS */
1271   } else { /* sparse B */
1272     if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1273     ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr);
1274     if (flgT) { /* input B is transpose of actural RHS matrix,
1275                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1276       ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr);
1277     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1278     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1279   }
1280 
1281   ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr);
1282   mumps->id.nrhs = nrhs;
1283   mumps->id.lrhs = M;
1284   mumps->id.rhs  = NULL;
1285 
1286   if (mumps->petsc_size == 1) {
1287     PetscScalar *aa;
1288     PetscInt    spnr,*ia,*ja;
1289     PetscBool   second_solve = PETSC_FALSE;
1290 
1291     ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1292     mumps->id.rhs = (MumpsScalar*)array;
1293 
1294     if (denseB) {
1295       /* copy B to X */
1296       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1297       ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr);
1298       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1299     } else { /* sparse B */
1300       ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr);
1301       ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1302       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1303       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1304       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1305     }
1306     /* handle condensation step of Schur complement (if any) */
1307     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1308       second_solve = PETSC_TRUE;
1309       ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1310     }
1311     /* solve phase */
1312     /*-------------*/
1313     mumps->id.job = JOB_SOLVE;
1314     PetscMUMPS_c(mumps);
1315     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1316 
1317     /* handle expansion step of Schur complement (if any) */
1318     if (second_solve) {
1319       ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1320     }
1321     if (!denseB) { /* sparse B */
1322       ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr);
1323       ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1324       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1325     }
1326     ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1327     PetscFunctionReturn(0);
1328   }
1329 
1330   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1331   if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc");
1332 
1333   /* create msol_loc to hold mumps local solution */
1334   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1335   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1336 
1337   lsol_loc  = mumps->id.lsol_loc;
1338   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1339   ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr);
1340   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1341   mumps->id.isol_loc = isol_loc;
1342 
1343   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr);
1344 
1345   if (denseB) {
1346     if (mumps->ICNTL20 == 10) {
1347       mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1348       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1349       ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr);
1350       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1351       ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1352       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr);
1353     } else {
1354       mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1355       /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1356         very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1357         0, re-arrange B into desired order, which is a local operation.
1358       */
1359 
1360       /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1361       /* wrap dense rhs matrix B into a vector v_mpi */
1362       ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1363       ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr);
1364       ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1365       ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr);
1366 
1367       /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1368       if (!mumps->myid) {
1369         PetscInt *idx;
1370         /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1371         ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr);
1372         ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr);
1373         k = 0;
1374         for (proc=0; proc<mumps->petsc_size; proc++) {
1375           for (j=0; j<nrhs; j++) {
1376             for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1377           }
1378         }
1379 
1380         ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr);
1381         ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr);
1382         ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr);
1383       } else {
1384         ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr);
1385         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr);
1386         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr);
1387       }
1388       ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr);
1389       ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1390       ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1391       ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1392       ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1393 
1394       if (!mumps->myid) { /* define rhs on the host */
1395         ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr);
1396         mumps->id.rhs = (MumpsScalar*)bray;
1397         ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr);
1398       }
1399     }
1400   } else { /* sparse B */
1401     b = (Mat_MPIAIJ*)Bt->data;
1402 
1403     /* wrap dense X into a vector v_mpi */
1404     ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr);
1405     ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr);
1406     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1407     ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr);
1408 
1409     if (!mumps->myid) {
1410       ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr);
1411       ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1412       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1413       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1414       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1415     } else {
1416       mumps->id.irhs_ptr    = NULL;
1417       mumps->id.irhs_sparse = NULL;
1418       mumps->id.nz_rhs      = 0;
1419       mumps->id.rhs_sparse  = NULL;
1420     }
1421   }
1422 
1423   /* solve phase */
1424   /*-------------*/
1425   mumps->id.job = JOB_SOLVE;
1426   PetscMUMPS_c(mumps);
1427   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d",mumps->id.INFOG(1));
1428 
1429   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1430   ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1431   ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr);
1432 
1433   /* create scatter scat_sol */
1434   ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr);
1435   /* iidx: index for scatter mumps solution to petsc X */
1436 
1437   ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr);
1438   ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr);
1439   for (i=0; i<lsol_loc; i++) {
1440     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1441 
1442     for (proc=0; proc<mumps->petsc_size; proc++) {
1443       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1444         myrstart = rstart[proc];
1445         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1446         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1447         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1448         break;
1449       }
1450     }
1451 
1452     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1453   }
1454   ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr);
1455   ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr);
1456   ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1457   ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1458   ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1459   ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1460   ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1461 
1462   /* free spaces */
1463   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1464   mumps->id.isol_loc = isol_loc_save;
1465 
1466   ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr);
1467   ierr = PetscFree(idxx);CHKERRQ(ierr);
1468   ierr = VecDestroy(&msol_loc);CHKERRQ(ierr);
1469   ierr = VecDestroy(&v_mpi);CHKERRQ(ierr);
1470   if (!denseB) {
1471     if (!mumps->myid) {
1472       b = (Mat_MPIAIJ*)Bt->data;
1473       ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr);
1474       ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1475       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1476     }
1477   } else {
1478     if (mumps->ICNTL20 == 0) {
1479       ierr = VecDestroy(&b_seq);CHKERRQ(ierr);
1480       ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr);
1481     }
1482   }
1483   ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr);
1484   ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr);
1485   PetscFunctionReturn(0);
1486 }
1487 
1488 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1489 {
1490   PetscErrorCode ierr;
1491   PetscBool      flg;
1492   Mat            B;
1493 
1494   PetscFunctionBegin;
1495   ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
1496   if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1497 
1498   /* Create B=Bt^T that uses Bt's data structure */
1499   ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr);
1500 
1501   ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr);
1502   ierr = MatDestroy(&B);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 #if !defined(PETSC_USE_COMPLEX)
1507 /*
1508   input:
1509    F:        numeric factor
1510   output:
1511    nneg:     total number of negative pivots
1512    nzero:    total number of zero pivots
1513    npos:     (global dimension of F) - nneg - nzero
1514 */
1515 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1516 {
1517   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1518   PetscErrorCode ierr;
1519   PetscMPIInt    size;
1520 
1521   PetscFunctionBegin;
1522   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRMPI(ierr);
1523   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1524   if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia",mumps->id.INFOG(13));
1525 
1526   if (nneg) *nneg = mumps->id.INFOG(12);
1527   if (nzero || npos) {
1528     if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1529     if (nzero) *nzero = mumps->id.INFOG(28);
1530     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1531   }
1532   PetscFunctionReturn(0);
1533 }
1534 #endif
1535 
1536 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1537 {
1538   PetscErrorCode ierr;
1539   PetscInt       i,nreqs;
1540   PetscMUMPSInt  *irn,*jcn;
1541   PetscMPIInt    count;
1542   PetscInt64     totnnz,remain;
1543   const PetscInt osize=mumps->omp_comm_size;
1544   PetscScalar    *val;
1545 
1546   PetscFunctionBegin;
1547   if (osize > 1) {
1548     if (reuse == MAT_INITIAL_MATRIX) {
1549       /* master first gathers counts of nonzeros to receive */
1550       if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);}
1551       ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRMPI(ierr);
1552 
1553       /* Then each computes number of send/recvs */
1554       if (mumps->is_omp_master) {
1555         /* Start from 1 since self communication is not done in MPI */
1556         nreqs = 0;
1557         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1558       } else {
1559         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1560       }
1561       ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */
1562 
1563       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1564          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1565          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1566          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1567        */
1568       nreqs = 0; /* counter for actual send/recvs */
1569       if (mumps->is_omp_master) {
1570         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1571         ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr);
1572         ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr);
1573 
1574         /* Self communication */
1575         ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr);
1576         ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr);
1577         ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr);
1578 
1579         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1580         ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
1581         ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
1582         mumps->nnz = totnnz;
1583         mumps->irn = irn;
1584         mumps->jcn = jcn;
1585         mumps->val = mumps->val_alloc = val;
1586 
1587         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1588         jcn += mumps->recvcount[0];
1589         val += mumps->recvcount[0];
1590 
1591         /* Remote communication */
1592         for (i=1; i<osize; i++) {
1593           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1594           remain = mumps->recvcount[i] - count;
1595           while (count>0) {
1596             ierr    = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1597             ierr    = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1598             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1599             irn    += count;
1600             jcn    += count;
1601             val    += count;
1602             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1603             remain -= count;
1604           }
1605         }
1606       } else {
1607         irn    = mumps->irn;
1608         jcn    = mumps->jcn;
1609         val    = mumps->val;
1610         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1611         remain = mumps->nnz - count;
1612         while (count>0) {
1613           ierr    = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1614           ierr    = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1615           ierr    = MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1616           irn    += count;
1617           jcn    += count;
1618           val    += count;
1619           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1620           remain -= count;
1621         }
1622       }
1623     } else {
1624       nreqs = 0;
1625       if (mumps->is_omp_master) {
1626         val = mumps->val + mumps->recvcount[0];
1627         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1628           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1629           remain = mumps->recvcount[i] - count;
1630           while (count>0) {
1631             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1632             val    += count;
1633             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1634             remain -= count;
1635           }
1636         }
1637       } else {
1638         val    = mumps->val;
1639         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1640         remain = mumps->nnz - count;
1641         while (count>0) {
1642           ierr    = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRMPI(ierr);
1643           val    += count;
1644           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1645           remain -= count;
1646         }
1647       }
1648     }
1649     ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRMPI(ierr);
1650     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1651   }
1652   PetscFunctionReturn(0);
1653 }
1654 
1655 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1656 {
1657   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1658   PetscErrorCode ierr;
1659   PetscBool      isMPIAIJ;
1660 
1661   PetscFunctionBegin;
1662   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1663     if (mumps->id.INFOG(1) == -6) {
1664       ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1665     }
1666     ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1667     PetscFunctionReturn(0);
1668   }
1669 
1670   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr);
1671   ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr);
1672 
1673   /* numerical factorization phase */
1674   /*-------------------------------*/
1675   mumps->id.job = JOB_FACTNUMERIC;
1676   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1677     if (!mumps->myid) {
1678       mumps->id.a = (MumpsScalar*)mumps->val;
1679     }
1680   } else {
1681     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1682   }
1683   PetscMUMPS_c(mumps);
1684   if (mumps->id.INFOG(1) < 0) {
1685     if (A->erroriffailure) {
1686       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
1687     } else {
1688       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1689         ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1690         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1691       } else if (mumps->id.INFOG(1) == -13) {
1692         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1693         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1694       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) {
1695         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1696         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1697       } else {
1698         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1699         F->factorerrortype = MAT_FACTOR_OTHER;
1700       }
1701     }
1702   }
1703   if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d",mumps->id.INFOG(16));
1704 
1705   F->assembled    = PETSC_TRUE;
1706   mumps->matstruc = SAME_NONZERO_PATTERN;
1707   if (F->schur) { /* reset Schur status to unfactored */
1708 #if defined(PETSC_HAVE_CUDA)
1709     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1710 #endif
1711     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1712       mumps->id.ICNTL(19) = 2;
1713       ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr);
1714     }
1715     ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr);
1716   }
1717 
1718   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1719   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1720 
1721   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1722   if (mumps->petsc_size > 1) {
1723     PetscInt    lsol_loc;
1724     PetscScalar *sol_loc;
1725 
1726     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr);
1727 
1728     /* distributed solution; Create x_seq=sol_loc for repeated use */
1729     if (mumps->x_seq) {
1730       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1731       ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
1732       ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
1733     }
1734     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1735     ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr);
1736     mumps->id.lsol_loc = lsol_loc;
1737     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1738     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr);
1739   }
1740   ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr);
1741   PetscFunctionReturn(0);
1742 }
1743 
1744 /* Sets MUMPS options from the options database */
1745 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1746 {
1747   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1748   PetscErrorCode ierr;
1749   PetscMUMPSInt  icntl=0;
1750   PetscInt       info[80],i,ninfo=80;
1751   PetscBool      flg=PETSC_FALSE;
1752 
1753   PetscFunctionBegin;
1754   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
1755   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr);
1756   if (flg) mumps->id.ICNTL(1) = icntl;
1757   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr);
1758   if (flg) mumps->id.ICNTL(2) = icntl;
1759   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr);
1760   if (flg) mumps->id.ICNTL(3) = icntl;
1761 
1762   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
1763   if (flg) mumps->id.ICNTL(4) = icntl;
1764   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1765 
1766   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr);
1767   if (flg) mumps->id.ICNTL(6) = icntl;
1768 
1769   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
1770   if (flg) {
1771     if (icntl == 1 || icntl < 0 || icntl > 7) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto");
1772     mumps->id.ICNTL(7) = icntl;
1773   }
1774 
1775   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr);
1776   /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */
1777   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr);
1778   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr);
1779   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr);
1780   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr);
1781   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr);
1782   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr);
1783   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1784     ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
1785     ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
1786   }
1787 
1788   /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps",
1789      and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran
1790      and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...'
1791      This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug,
1792      see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590.
1793      In short, we could not use distributed RHS with MPICH until v4.0b1.
1794    */
1795 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0) || (defined(PETSC_HAVE_MPICH_NUMVERSION) && (PETSC_HAVE_MPICH_NUMVERSION < 40000101))
1796   mumps->ICNTL20 = 0;  /* Centralized dense RHS*/
1797 #else
1798   mumps->ICNTL20 = 10; /* Distributed dense RHS*/
1799 #endif
1800   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides","None",mumps->ICNTL20,&mumps->ICNTL20,&flg);CHKERRQ(ierr);
1801   if (flg && mumps->ICNTL20 != 10 && mumps->ICNTL20 != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10",(int)mumps->ICNTL20);
1802 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1803   if (flg && mumps->ICNTL20 == 10) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"ICNTL(20)=10 is not supported before MUMPS-5.3.0");
1804 #endif
1805   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */
1806 
1807   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr);
1808   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr);
1809   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr);
1810   if (mumps->id.ICNTL(24)) {
1811     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1812   }
1813 
1814   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr);
1815   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr);
1816   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr);
1817   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr);
1818   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr);
1819   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */
1820   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr);
1821   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr);  -- not supported by PETSc API */
1822   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr);
1823   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr);
1824   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr);
1825   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr);
1826 
1827   ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr);
1828   ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr);
1829   ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr);
1830   ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr);
1831   ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr);
1832   ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr);
1833 
1834   ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr);
1835 
1836   ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr);
1837   if (ninfo) {
1838     if (PetscUnlikely(ninfo > 80)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %" PetscInt_FMT " must <= 80",ninfo);
1839     ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr);
1840     mumps->ninfo = ninfo;
1841     for (i=0; i<ninfo; i++) {
1842       if (PetscUnlikely(info[i] < 0 || info[i]>80)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %" PetscInt_FMT " must between 1 and 80",ninfo);
1843       else  mumps->info[i] = info[i];
1844     }
1845   }
1846 
1847   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1848   PetscFunctionReturn(0);
1849 }
1850 
1851 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1852 {
1853   PetscErrorCode ierr;
1854   PetscInt       nthreads=0;
1855 
1856   PetscFunctionBegin;
1857   mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1858   ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRMPI(ierr);
1859   ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRMPI(ierr);/* "if (!myid)" still works even if mumps_comm is different */
1860 
1861   ierr = PetscOptionsHasName(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr);
1862   if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1863   ierr = PetscOptionsGetInt(NULL,((PetscObject)A)->prefix,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr);
1864   if (mumps->use_petsc_omp_support) {
1865 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1866     ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr);
1867     ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr);
1868 #else
1869     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual",((PetscObject)A)->prefix?((PetscObject)A)->prefix:"");
1870 #endif
1871   } else {
1872     mumps->omp_comm      = PETSC_COMM_SELF;
1873     mumps->mumps_comm    = mumps->petsc_comm;
1874     mumps->is_omp_master = PETSC_TRUE;
1875   }
1876   ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRMPI(ierr);
1877   mumps->reqs = NULL;
1878   mumps->tag  = 0;
1879 
1880   /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */
1881   if (mumps->mumps_comm != MPI_COMM_NULL) {
1882     ierr =  PetscCommGetComm(PetscObjectComm((PetscObject)A),&mumps->mumps_comm);CHKERRQ(ierr);
1883   }
1884 
1885   mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1886   mumps->id.job = JOB_INIT;
1887   mumps->id.par = 1;  /* host participates factorizaton and solve */
1888   mumps->id.sym = mumps->sym;
1889 
1890   PetscMUMPS_c(mumps);
1891   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d",mumps->id.INFOG(1));
1892 
1893   /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1894      For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1895    */
1896   ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm);CHKERRMPI(ierr);
1897   ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRMPI(ierr);
1898 
1899   mumps->scat_rhs = NULL;
1900   mumps->scat_sol = NULL;
1901 
1902   /* set PETSc-MUMPS default options - override MUMPS default */
1903   mumps->id.ICNTL(3) = 0;
1904   mumps->id.ICNTL(4) = 0;
1905   if (mumps->petsc_size == 1) {
1906     mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1907     mumps->id.ICNTL(7)  = 7;   /* automatic choice of ordering done by the package */
1908   } else {
1909     mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1910     mumps->id.ICNTL(21) = 1;   /* distributed solution */
1911   }
1912 
1913   /* schur */
1914   mumps->id.size_schur    = 0;
1915   mumps->id.listvar_schur = NULL;
1916   mumps->id.schur         = NULL;
1917   mumps->sizeredrhs       = 0;
1918   mumps->schur_sol        = NULL;
1919   mumps->schur_sizesol    = 0;
1920   PetscFunctionReturn(0);
1921 }
1922 
1923 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1924 {
1925   PetscErrorCode ierr;
1926 
1927   PetscFunctionBegin;
1928   if (mumps->id.INFOG(1) < 0) {
1929     if (A->erroriffailure) {
1930       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d",mumps->id.INFOG(1));
1931     } else {
1932       if (mumps->id.INFOG(1) == -6) {
1933         ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1934         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1935       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1936         ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1937         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1938       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1939         ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr);
1940       } else {
1941         ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1942         F->factorerrortype = MAT_FACTOR_OTHER;
1943       }
1944     }
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1950 {
1951   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1952   PetscErrorCode ierr;
1953   Vec            b;
1954   const PetscInt M = A->rmap->N;
1955 
1956   PetscFunctionBegin;
1957   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
1958 
1959   /* Set MUMPS options from the options database */
1960   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
1961 
1962   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
1963   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
1964 
1965   /* analysis phase */
1966   /*----------------*/
1967   mumps->id.job = JOB_FACTSYMBOLIC;
1968   mumps->id.n   = M;
1969   switch (mumps->id.ICNTL(18)) {
1970   case 0:  /* centralized assembled matrix input */
1971     if (!mumps->myid) {
1972       mumps->id.nnz = mumps->nnz;
1973       mumps->id.irn = mumps->irn;
1974       mumps->id.jcn = mumps->jcn;
1975       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1976       if (r) {
1977         mumps->id.ICNTL(7) = 1;
1978         if (!mumps->myid) {
1979           const PetscInt *idx;
1980           PetscInt       i;
1981 
1982           ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr);
1983           ierr = ISGetIndices(r,&idx);CHKERRQ(ierr);
1984           for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */
1985           ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr);
1986         }
1987       }
1988     }
1989     break;
1990   case 3:  /* distributed assembled matrix input (size>1) */
1991     mumps->id.nnz_loc = mumps->nnz;
1992     mumps->id.irn_loc = mumps->irn;
1993     mumps->id.jcn_loc = mumps->jcn;
1994     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1995     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1996       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
1997       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
1998       ierr = VecDestroy(&b);CHKERRQ(ierr);
1999     }
2000     break;
2001   }
2002   PetscMUMPS_c(mumps);
2003   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2004 
2005   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2006   F->ops->solve           = MatSolve_MUMPS;
2007   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2008   F->ops->matsolve        = MatMatSolve_MUMPS;
2009   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2010   PetscFunctionReturn(0);
2011 }
2012 
2013 /* Note the Petsc r and c permutations are ignored */
2014 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2015 {
2016   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2017   PetscErrorCode ierr;
2018   Vec            b;
2019   const PetscInt M = A->rmap->N;
2020 
2021   PetscFunctionBegin;
2022   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2023 
2024   /* Set MUMPS options from the options database */
2025   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2026 
2027   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2028   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2029 
2030   /* analysis phase */
2031   /*----------------*/
2032   mumps->id.job = JOB_FACTSYMBOLIC;
2033   mumps->id.n   = M;
2034   switch (mumps->id.ICNTL(18)) {
2035   case 0:  /* centralized assembled matrix input */
2036     if (!mumps->myid) {
2037       mumps->id.nnz = mumps->nnz;
2038       mumps->id.irn = mumps->irn;
2039       mumps->id.jcn = mumps->jcn;
2040       if (mumps->id.ICNTL(6)>1) {
2041         mumps->id.a = (MumpsScalar*)mumps->val;
2042       }
2043     }
2044     break;
2045   case 3:  /* distributed assembled matrix input (size>1) */
2046     mumps->id.nnz_loc = mumps->nnz;
2047     mumps->id.irn_loc = mumps->irn;
2048     mumps->id.jcn_loc = mumps->jcn;
2049     if (mumps->id.ICNTL(6)>1) {
2050       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2051     }
2052     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2053       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2054       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2055       ierr = VecDestroy(&b);CHKERRQ(ierr);
2056     }
2057     break;
2058   }
2059   PetscMUMPS_c(mumps);
2060   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2061 
2062   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2063   F->ops->solve           = MatSolve_MUMPS;
2064   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 /* Note the Petsc r permutation and factor info are ignored */
2069 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2070 {
2071   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2072   PetscErrorCode ierr;
2073   Vec            b;
2074   const PetscInt M = A->rmap->N;
2075 
2076   PetscFunctionBegin;
2077   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2078 
2079   /* Set MUMPS options from the options database */
2080   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2081 
2082   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2083   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2084 
2085   /* analysis phase */
2086   /*----------------*/
2087   mumps->id.job = JOB_FACTSYMBOLIC;
2088   mumps->id.n   = M;
2089   switch (mumps->id.ICNTL(18)) {
2090   case 0:  /* centralized assembled matrix input */
2091     if (!mumps->myid) {
2092       mumps->id.nnz = mumps->nnz;
2093       mumps->id.irn = mumps->irn;
2094       mumps->id.jcn = mumps->jcn;
2095       if (mumps->id.ICNTL(6)>1) {
2096         mumps->id.a = (MumpsScalar*)mumps->val;
2097       }
2098     }
2099     break;
2100   case 3:  /* distributed assembled matrix input (size>1) */
2101     mumps->id.nnz_loc = mumps->nnz;
2102     mumps->id.irn_loc = mumps->irn;
2103     mumps->id.jcn_loc = mumps->jcn;
2104     if (mumps->id.ICNTL(6)>1) {
2105       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2106     }
2107     if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2108       ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2109       ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2110       ierr = VecDestroy(&b);CHKERRQ(ierr);
2111     }
2112     break;
2113   }
2114   PetscMUMPS_c(mumps);
2115   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2116 
2117   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2118   F->ops->solve                 = MatSolve_MUMPS;
2119   F->ops->solvetranspose        = MatSolve_MUMPS;
2120   F->ops->matsolve              = MatMatSolve_MUMPS;
2121   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2122 #if defined(PETSC_USE_COMPLEX)
2123   F->ops->getinertia = NULL;
2124 #else
2125   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2126 #endif
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2131 {
2132   PetscErrorCode    ierr;
2133   PetscBool         iascii;
2134   PetscViewerFormat format;
2135   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2136 
2137   PetscFunctionBegin;
2138   /* check if matrix is mumps type */
2139   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2140 
2141   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
2142   if (iascii) {
2143     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
2144     if (format == PETSC_VIEWER_ASCII_INFO) {
2145       ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
2146       ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d\n",mumps->id.sym);CHKERRQ(ierr);
2147       ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d\n",mumps->id.par);CHKERRQ(ierr);
2148       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d\n",mumps->id.ICNTL(1));CHKERRQ(ierr);
2149       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d\n",mumps->id.ICNTL(2));CHKERRQ(ierr);
2150       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d\n",mumps->id.ICNTL(3));CHKERRQ(ierr);
2151       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d\n",mumps->id.ICNTL(4));CHKERRQ(ierr);
2152       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d\n",mumps->id.ICNTL(5));CHKERRQ(ierr);
2153       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d\n",mumps->id.ICNTL(6));CHKERRQ(ierr);
2154       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d\n",mumps->id.ICNTL(7));CHKERRQ(ierr);
2155       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d\n",mumps->id.ICNTL(8));CHKERRQ(ierr);
2156       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d\n",mumps->id.ICNTL(10));CHKERRQ(ierr);
2157       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d\n",mumps->id.ICNTL(11));CHKERRQ(ierr);
2158       if (mumps->id.ICNTL(11)>0) {
2159         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr);
2160         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr);
2161         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr);
2162         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr);
2163         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr);
2164         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr);
2165       }
2166       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d\n",mumps->id.ICNTL(12));CHKERRQ(ierr);
2167       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d\n",mumps->id.ICNTL(13));CHKERRQ(ierr);
2168       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d\n",mumps->id.ICNTL(14));CHKERRQ(ierr);
2169       /* ICNTL(15-17) not used */
2170       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d\n",mumps->id.ICNTL(18));CHKERRQ(ierr);
2171       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d\n",mumps->id.ICNTL(19));CHKERRQ(ierr);
2172       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d\n",mumps->id.ICNTL(20));CHKERRQ(ierr);
2173       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d\n",mumps->id.ICNTL(21));CHKERRQ(ierr);
2174       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d\n",mumps->id.ICNTL(22));CHKERRQ(ierr);
2175       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d\n",mumps->id.ICNTL(23));CHKERRQ(ierr);
2176 
2177       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d\n",mumps->id.ICNTL(24));CHKERRQ(ierr);
2178       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d\n",mumps->id.ICNTL(25));CHKERRQ(ierr);
2179       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d\n",mumps->id.ICNTL(26));CHKERRQ(ierr);
2180       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d\n",mumps->id.ICNTL(27));CHKERRQ(ierr);
2181       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d\n",mumps->id.ICNTL(28));CHKERRQ(ierr);
2182       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d\n",mumps->id.ICNTL(29));CHKERRQ(ierr);
2183 
2184       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d\n",mumps->id.ICNTL(30));CHKERRQ(ierr);
2185       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d\n",mumps->id.ICNTL(31));CHKERRQ(ierr);
2186       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d\n",mumps->id.ICNTL(33));CHKERRQ(ierr);
2187       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d\n",mumps->id.ICNTL(35));CHKERRQ(ierr);
2188       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d\n",mumps->id.ICNTL(36));CHKERRQ(ierr);
2189       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d\n",mumps->id.ICNTL(38));CHKERRQ(ierr);
2190 
2191       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1));CHKERRQ(ierr);
2192       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr);
2193       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3));CHKERRQ(ierr);
2194       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4));CHKERRQ(ierr);
2195       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5));CHKERRQ(ierr);
2196       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7));CHKERRQ(ierr);
2197 
2198       /* information local to each processor */
2199       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
2200       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
2201       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr);
2202       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2203       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
2204       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr);
2205       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2206       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
2207       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr);
2208       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2209 
2210       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);
2211       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d\n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr);
2212       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2213 
2214       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);
2215       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr);
2216       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2217 
2218       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);
2219       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr);
2220       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2221 
2222       if (mumps->ninfo && mumps->ninfo <= 80) {
2223         PetscInt i;
2224         for (i=0; i<mumps->ninfo; i++) {
2225           ierr = PetscViewerASCIIPrintf(viewer, "  INFO(%" PetscInt_FMT "): \n",mumps->info[i]);CHKERRQ(ierr);
2226           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d\n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr);
2227           ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2228         }
2229       }
2230       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
2231 
2232       if (!mumps->myid) { /* information from the host */
2233         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr);
2234         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr);
2235         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr);
2236         ierr = PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr);
2237 
2238         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(3));CHKERRQ(ierr);
2239         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n",mumps->id.INFOG(4));CHKERRQ(ierr);
2240         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d\n",mumps->id.INFOG(5));CHKERRQ(ierr);
2241         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d\n",mumps->id.INFOG(6));CHKERRQ(ierr);
2242         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively used after analysis): %d\n",mumps->id.INFOG(7));CHKERRQ(ierr);
2243         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n",mumps->id.INFOG(8));CHKERRQ(ierr);
2244         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n",mumps->id.INFOG(9));CHKERRQ(ierr);
2245         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d\n",mumps->id.INFOG(10));CHKERRQ(ierr);
2246         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d\n",mumps->id.INFOG(11));CHKERRQ(ierr);
2247         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d\n",mumps->id.INFOG(12));CHKERRQ(ierr);
2248         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d\n",mumps->id.INFOG(13));CHKERRQ(ierr);
2249         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d\n",mumps->id.INFOG(14));CHKERRQ(ierr);
2250         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d\n",mumps->id.INFOG(15));CHKERRQ(ierr);
2251         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n",mumps->id.INFOG(16));CHKERRQ(ierr);
2252         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n",mumps->id.INFOG(17));CHKERRQ(ierr);
2253         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n",mumps->id.INFOG(18));CHKERRQ(ierr);
2254         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n",mumps->id.INFOG(19));CHKERRQ(ierr);
2255         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d\n",mumps->id.INFOG(20));CHKERRQ(ierr);
2256         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n",mumps->id.INFOG(21));CHKERRQ(ierr);
2257         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n",mumps->id.INFOG(22));CHKERRQ(ierr);
2258         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n",mumps->id.INFOG(23));CHKERRQ(ierr);
2259         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n",mumps->id.INFOG(24));CHKERRQ(ierr);
2260         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n",mumps->id.INFOG(25));CHKERRQ(ierr);
2261         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr);
2262         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr);
2263         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr);
2264         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr);
2265         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr);
2266         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr);
2267         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr);
2268         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(36));CHKERRQ(ierr);
2269         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n",mumps->id.INFOG(37));CHKERRQ(ierr);
2270         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n",mumps->id.INFOG(38));CHKERRQ(ierr);
2271         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n",mumps->id.INFOG(39));CHKERRQ(ierr);
2272       }
2273     }
2274   }
2275   PetscFunctionReturn(0);
2276 }
2277 
2278 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2279 {
2280   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2281 
2282   PetscFunctionBegin;
2283   info->block_size        = 1.0;
2284   info->nz_allocated      = mumps->id.INFOG(20);
2285   info->nz_used           = mumps->id.INFOG(20);
2286   info->nz_unneeded       = 0.0;
2287   info->assemblies        = 0.0;
2288   info->mallocs           = 0.0;
2289   info->memory            = 0.0;
2290   info->fill_ratio_given  = 0;
2291   info->fill_ratio_needed = 0;
2292   info->factor_mallocs    = 0;
2293   PetscFunctionReturn(0);
2294 }
2295 
2296 /* -------------------------------------------------------------------------------------------*/
2297 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2298 {
2299   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2300   const PetscScalar *arr;
2301   const PetscInt    *idxs;
2302   PetscInt          size,i;
2303   PetscErrorCode    ierr;
2304 
2305   PetscFunctionBegin;
2306   ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr);
2307   if (mumps->petsc_size > 1) {
2308     PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2309 
2310     ls   = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2311     ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRMPI(ierr);
2312     if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc");
2313   }
2314 
2315   /* Schur complement matrix */
2316   ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
2317   ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr);
2318   ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr);
2319   mumps->id.schur      = (MumpsScalar*)arr;
2320   mumps->id.size_schur = size;
2321   mumps->id.schur_lld  = size;
2322   ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr);
2323   if (mumps->sym == 1) {
2324     ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
2325   }
2326 
2327   /* MUMPS expects Fortran style indices */
2328   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
2329   ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr);
2330   ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr);
2331   for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);}
2332   ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr);
2333   if (mumps->petsc_size > 1) {
2334     mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2335   } else {
2336     if (F->factortype == MAT_FACTOR_LU) {
2337       mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2338     } else {
2339       mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2340     }
2341   }
2342   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2343   mumps->id.ICNTL(26) = -1;
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 /* -------------------------------------------------------------------------------------------*/
2348 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2349 {
2350   Mat            St;
2351   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2352   PetscScalar    *array;
2353 #if defined(PETSC_USE_COMPLEX)
2354   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2355 #endif
2356   PetscErrorCode ierr;
2357 
2358   PetscFunctionBegin;
2359   if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2360   ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr);
2361   ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr);
2362   ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr);
2363   ierr = MatSetUp(St);CHKERRQ(ierr);
2364   ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr);
2365   if (!mumps->sym) { /* MUMPS always return a full matrix */
2366     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2367       PetscInt i,j,N=mumps->id.size_schur;
2368       for (i=0;i<N;i++) {
2369         for (j=0;j<N;j++) {
2370 #if !defined(PETSC_USE_COMPLEX)
2371           PetscScalar val = mumps->id.schur[i*N+j];
2372 #else
2373           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2374 #endif
2375           array[j*N+i] = val;
2376         }
2377       }
2378     } else { /* stored by columns */
2379       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2380     }
2381   } else { /* either full or lower-triangular (not packed) */
2382     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2383       PetscInt i,j,N=mumps->id.size_schur;
2384       for (i=0;i<N;i++) {
2385         for (j=i;j<N;j++) {
2386 #if !defined(PETSC_USE_COMPLEX)
2387           PetscScalar val = mumps->id.schur[i*N+j];
2388 #else
2389           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2390 #endif
2391           array[i*N+j] = val;
2392           array[j*N+i] = val;
2393         }
2394       }
2395     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2396       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2397     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2398       PetscInt i,j,N=mumps->id.size_schur;
2399       for (i=0;i<N;i++) {
2400         for (j=0;j<i+1;j++) {
2401 #if !defined(PETSC_USE_COMPLEX)
2402           PetscScalar val = mumps->id.schur[i*N+j];
2403 #else
2404           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2405 #endif
2406           array[i*N+j] = val;
2407           array[j*N+i] = val;
2408         }
2409       }
2410     }
2411   }
2412   ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr);
2413   *S   = St;
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 /* -------------------------------------------------------------------------------------------*/
2418 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2419 {
2420   PetscErrorCode ierr;
2421   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2422 
2423   PetscFunctionBegin;
2424   ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr);
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2429 {
2430   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2431 
2432   PetscFunctionBegin;
2433   *ival = mumps->id.ICNTL(icntl);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 /*@
2438   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2439 
2440    Logically Collective on Mat
2441 
2442    Input Parameters:
2443 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2444 .  icntl - index of MUMPS parameter array ICNTL()
2445 -  ival - value of MUMPS ICNTL(icntl)
2446 
2447   Options Database:
2448 .   -mat_mumps_icntl_<icntl> <ival>
2449 
2450    Level: beginner
2451 
2452    References:
2453 .     MUMPS Users' Guide
2454 
2455 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2456  @*/
2457 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2458 {
2459   PetscErrorCode ierr;
2460 
2461   PetscFunctionBegin;
2462   PetscValidType(F,1);
2463   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2464   PetscValidLogicalCollectiveInt(F,icntl,2);
2465   PetscValidLogicalCollectiveInt(F,ival,3);
2466   ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 /*@
2471   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2472 
2473    Logically Collective on Mat
2474 
2475    Input Parameters:
2476 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2477 -  icntl - index of MUMPS parameter array ICNTL()
2478 
2479   Output Parameter:
2480 .  ival - value of MUMPS ICNTL(icntl)
2481 
2482    Level: beginner
2483 
2484    References:
2485 .     MUMPS Users' Guide
2486 
2487 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2488 @*/
2489 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2490 {
2491   PetscErrorCode ierr;
2492 
2493   PetscFunctionBegin;
2494   PetscValidType(F,1);
2495   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2496   PetscValidLogicalCollectiveInt(F,icntl,2);
2497   PetscValidIntPointer(ival,3);
2498   ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 /* -------------------------------------------------------------------------------------------*/
2503 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2504 {
2505   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2506 
2507   PetscFunctionBegin;
2508   mumps->id.CNTL(icntl) = val;
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2513 {
2514   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2515 
2516   PetscFunctionBegin;
2517   *val = mumps->id.CNTL(icntl);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 /*@
2522   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2523 
2524    Logically Collective on Mat
2525 
2526    Input Parameters:
2527 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2528 .  icntl - index of MUMPS parameter array CNTL()
2529 -  val - value of MUMPS CNTL(icntl)
2530 
2531   Options Database:
2532 .   -mat_mumps_cntl_<icntl> <val>
2533 
2534    Level: beginner
2535 
2536    References:
2537 .     MUMPS Users' Guide
2538 
2539 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2540 @*/
2541 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2542 {
2543   PetscErrorCode ierr;
2544 
2545   PetscFunctionBegin;
2546   PetscValidType(F,1);
2547   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2548   PetscValidLogicalCollectiveInt(F,icntl,2);
2549   PetscValidLogicalCollectiveReal(F,val,3);
2550   ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr);
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 /*@
2555   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2556 
2557    Logically Collective on Mat
2558 
2559    Input Parameters:
2560 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2561 -  icntl - index of MUMPS parameter array CNTL()
2562 
2563   Output Parameter:
2564 .  val - value of MUMPS CNTL(icntl)
2565 
2566    Level: beginner
2567 
2568    References:
2569 .      MUMPS Users' Guide
2570 
2571 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2572 @*/
2573 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2574 {
2575   PetscErrorCode ierr;
2576 
2577   PetscFunctionBegin;
2578   PetscValidType(F,1);
2579   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2580   PetscValidLogicalCollectiveInt(F,icntl,2);
2581   PetscValidRealPointer(val,3);
2582   ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2587 {
2588   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2589 
2590   PetscFunctionBegin;
2591   *info = mumps->id.INFO(icntl);
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2596 {
2597   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2598 
2599   PetscFunctionBegin;
2600   *infog = mumps->id.INFOG(icntl);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2605 {
2606   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2607 
2608   PetscFunctionBegin;
2609   *rinfo = mumps->id.RINFO(icntl);
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2614 {
2615   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2616 
2617   PetscFunctionBegin;
2618   *rinfog = mumps->id.RINFOG(icntl);
2619   PetscFunctionReturn(0);
2620 }
2621 
2622 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2623 {
2624   PetscErrorCode ierr;
2625   Mat            Bt = NULL,Btseq = NULL;
2626   PetscBool      flg;
2627   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2628   PetscScalar    *aa;
2629   PetscInt       spnr,*ia,*ja,M,nrhs;
2630 
2631   PetscFunctionBegin;
2632   PetscValidPointer(spRHS,2);
2633   ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr);
2634   if (flg) {
2635     ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr);
2636   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2637 
2638   ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr);
2639 
2640   if (mumps->petsc_size > 1) {
2641     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2642     Btseq = b->A;
2643   } else {
2644     Btseq = Bt;
2645   }
2646 
2647   ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr);
2648   mumps->id.nrhs = nrhs;
2649   mumps->id.lrhs = M;
2650   mumps->id.rhs  = NULL;
2651 
2652   if (!mumps->myid) {
2653     ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr);
2654     ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2655     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2656     ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
2657     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2658   } else {
2659     mumps->id.irhs_ptr    = NULL;
2660     mumps->id.irhs_sparse = NULL;
2661     mumps->id.nz_rhs      = 0;
2662     mumps->id.rhs_sparse  = NULL;
2663   }
2664   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2665   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2666 
2667   /* solve phase */
2668   /*-------------*/
2669   mumps->id.job = JOB_SOLVE;
2670   PetscMUMPS_c(mumps);
2671   if (mumps->id.INFOG(1) < 0)
2672     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d",mumps->id.INFOG(1),mumps->id.INFO(2));
2673 
2674   if (!mumps->myid) {
2675     ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr);
2676     ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2677     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2678   }
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 /*@
2683   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2684 
2685    Logically Collective on Mat
2686 
2687    Input Parameters:
2688 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2689 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2690 
2691   Output Parameter:
2692 . spRHS - requested entries of inverse of A
2693 
2694    Level: beginner
2695 
2696    References:
2697 .      MUMPS Users' Guide
2698 
2699 .seealso: MatGetFactor(), MatCreateTranspose()
2700 @*/
2701 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2702 {
2703   PetscErrorCode ierr;
2704 
2705   PetscFunctionBegin;
2706   PetscValidType(F,1);
2707   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2708   ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr);
2709   PetscFunctionReturn(0);
2710 }
2711 
2712 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2713 {
2714   PetscErrorCode ierr;
2715   Mat            spRHS;
2716 
2717   PetscFunctionBegin;
2718   ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr);
2719   ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr);
2720   ierr = MatDestroy(&spRHS);CHKERRQ(ierr);
2721   PetscFunctionReturn(0);
2722 }
2723 
2724 /*@
2725   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2726 
2727    Logically Collective on Mat
2728 
2729    Input Parameters:
2730 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2731 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2732 
2733   Output Parameter:
2734 . spRHST - requested entries of inverse of A^T
2735 
2736    Level: beginner
2737 
2738    References:
2739 .      MUMPS Users' Guide
2740 
2741 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2742 @*/
2743 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2744 {
2745   PetscErrorCode ierr;
2746   PetscBool      flg;
2747 
2748   PetscFunctionBegin;
2749   PetscValidType(F,1);
2750   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2751   ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
2752   if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2753 
2754   ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr);
2755   PetscFunctionReturn(0);
2756 }
2757 
2758 /*@
2759   MatMumpsGetInfo - Get MUMPS parameter INFO()
2760 
2761    Logically Collective on Mat
2762 
2763    Input Parameters:
2764 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2765 -  icntl - index of MUMPS parameter array INFO()
2766 
2767   Output Parameter:
2768 .  ival - value of MUMPS INFO(icntl)
2769 
2770    Level: beginner
2771 
2772    References:
2773 .      MUMPS Users' Guide
2774 
2775 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2776 @*/
2777 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2778 {
2779   PetscErrorCode ierr;
2780 
2781   PetscFunctionBegin;
2782   PetscValidType(F,1);
2783   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2784   PetscValidIntPointer(ival,3);
2785   ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2786   PetscFunctionReturn(0);
2787 }
2788 
2789 /*@
2790   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2791 
2792    Logically Collective on Mat
2793 
2794    Input Parameters:
2795 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2796 -  icntl - index of MUMPS parameter array INFOG()
2797 
2798   Output Parameter:
2799 .  ival - value of MUMPS INFOG(icntl)
2800 
2801    Level: beginner
2802 
2803    References:
2804 .      MUMPS Users' Guide
2805 
2806 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2807 @*/
2808 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2809 {
2810   PetscErrorCode ierr;
2811 
2812   PetscFunctionBegin;
2813   PetscValidType(F,1);
2814   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2815   PetscValidIntPointer(ival,3);
2816   ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2817   PetscFunctionReturn(0);
2818 }
2819 
2820 /*@
2821   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2822 
2823    Logically Collective on Mat
2824 
2825    Input Parameters:
2826 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2827 -  icntl - index of MUMPS parameter array RINFO()
2828 
2829   Output Parameter:
2830 .  val - value of MUMPS RINFO(icntl)
2831 
2832    Level: beginner
2833 
2834    References:
2835 .       MUMPS Users' Guide
2836 
2837 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2838 @*/
2839 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2840 {
2841   PetscErrorCode ierr;
2842 
2843   PetscFunctionBegin;
2844   PetscValidType(F,1);
2845   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2846   PetscValidRealPointer(val,3);
2847   ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2848   PetscFunctionReturn(0);
2849 }
2850 
2851 /*@
2852   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2853 
2854    Logically Collective on Mat
2855 
2856    Input Parameters:
2857 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2858 -  icntl - index of MUMPS parameter array RINFOG()
2859 
2860   Output Parameter:
2861 .  val - value of MUMPS RINFOG(icntl)
2862 
2863    Level: beginner
2864 
2865    References:
2866 .      MUMPS Users' Guide
2867 
2868 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2869 @*/
2870 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2871 {
2872   PetscErrorCode ierr;
2873 
2874   PetscFunctionBegin;
2875   PetscValidType(F,1);
2876   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2877   PetscValidRealPointer(val,3);
2878   ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2879   PetscFunctionReturn(0);
2880 }
2881 
2882 /*MC
2883   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2884   distributed and sequential matrices via the external package MUMPS.
2885 
2886   Works with MATAIJ and MATSBAIJ matrices
2887 
2888   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2889 
2890   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2891 
2892   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2893 
2894   Options Database Keys:
2895 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2896 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2897 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2898 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2899 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2900 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto
2901                         Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only)
2902 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2903 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2904 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2905 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2906 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2907 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2908 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2909 .  -mat_mumps_icntl_20  - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS
2910 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2911 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2912 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2913 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2914 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2915 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2916 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2917 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2918 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2919 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2920 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2921 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2922 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2923 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2924 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2925 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2926 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2927 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2928 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2929 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2930                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2931 
2932   Level: beginner
2933 
2934     Notes:
2935     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2936 
2937     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2938 $          KSPGetPC(ksp,&pc);
2939 $          PCFactorGetMatrix(pc,&mat);
2940 $          MatMumpsGetInfo(mat,....);
2941 $          MatMumpsGetInfog(mat,....); etc.
2942            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2943 
2944   Using MUMPS with 64-bit integers
2945     MUMPS provides 64-bit integer support in two build modes:
2946       full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and
2947       requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI).
2948 
2949       selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices,
2950       MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and
2951       columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit
2952       integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS.
2953 
2954     With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc.
2955 
2956   Two modes to run MUMPS/PETSc with OpenMP
2957 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2958 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2959 
2960 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2961 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2962 
2963    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2964    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2965    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2966    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2967    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2968 
2969    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2970    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2971    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2972    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2973    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2974    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2975    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2976    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2977    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2978    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2979    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2980    examine the mapping result.
2981 
2982    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2983    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2984    calls omp_set_num_threads(m) internally before calling MUMPS.
2985 
2986    References:
2987 +   1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2988 -   2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2989 
2990 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCFactorGetMatrix()
2991 
2992 M*/
2993 
2994 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2995 {
2996   PetscFunctionBegin;
2997   *type = MATSOLVERMUMPS;
2998   PetscFunctionReturn(0);
2999 }
3000 
3001 /* MatGetFactor for Seq and MPI AIJ matrices */
3002 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
3003 {
3004   Mat            B;
3005   PetscErrorCode ierr;
3006   Mat_MUMPS      *mumps;
3007   PetscBool      isSeqAIJ;
3008   PetscMPIInt    size;
3009 
3010   PetscFunctionBegin;
3011  #if defined(PETSC_USE_COMPLEX)
3012   if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3013  #endif
3014   /* Create the factorization matrix */
3015   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
3016   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3017   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3018   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3019   ierr = MatSetUp(B);CHKERRQ(ierr);
3020 
3021   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3022 
3023   B->ops->view    = MatView_MUMPS;
3024   B->ops->getinfo = MatGetInfo_MUMPS;
3025 
3026   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3028   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3029   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3030   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3031   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3032   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3033   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3034   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3039 
3040   if (ftype == MAT_FACTOR_LU) {
3041     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3042     B->factortype            = MAT_FACTOR_LU;
3043     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3044     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3045     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3046     mumps->sym = 0;
3047   } else {
3048     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3049     B->factortype                  = MAT_FACTOR_CHOLESKY;
3050     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3051     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3052     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr);
3053 #if defined(PETSC_USE_COMPLEX)
3054     mumps->sym = 2;
3055 #else
3056     if (A->spd_set && A->spd) mumps->sym = 1;
3057     else                      mumps->sym = 2;
3058 #endif
3059   }
3060 
3061   /* set solvertype */
3062   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3063   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3064   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3065   if (size == 1) {
3066     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3067     B->canuseordering = PETSC_TRUE;
3068   }
3069   B->ops->destroy = MatDestroy_MUMPS;
3070   B->data         = (void*)mumps;
3071 
3072   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3073 
3074   *F = B;
3075   PetscFunctionReturn(0);
3076 }
3077 
3078 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3079 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3080 {
3081   Mat            B;
3082   PetscErrorCode ierr;
3083   Mat_MUMPS      *mumps;
3084   PetscBool      isSeqSBAIJ;
3085   PetscMPIInt    size;
3086 
3087   PetscFunctionBegin;
3088  #if defined(PETSC_USE_COMPLEX)
3089   if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3090  #endif
3091   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3092   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3093   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3094   ierr = MatSetUp(B);CHKERRQ(ierr);
3095 
3096   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3097   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr);
3098   if (isSeqSBAIJ) {
3099     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3100   } else {
3101     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3102   }
3103 
3104   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3105   B->ops->view                   = MatView_MUMPS;
3106   B->ops->getinfo                = MatGetInfo_MUMPS;
3107 
3108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3109   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3112   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3113   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3114   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3115   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3116   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3117   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3118   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3119   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3120   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3121 
3122   B->factortype = MAT_FACTOR_CHOLESKY;
3123 #if defined(PETSC_USE_COMPLEX)
3124   mumps->sym = 2;
3125 #else
3126   if (A->spd_set && A->spd) mumps->sym = 1;
3127   else                      mumps->sym = 2;
3128 #endif
3129 
3130   /* set solvertype */
3131   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3132   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3133   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3134   if (size == 1) {
3135     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3136     B->canuseordering = PETSC_TRUE;
3137   }
3138   ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_CHOLESKY]);CHKERRQ(ierr);
3139   B->ops->destroy = MatDestroy_MUMPS;
3140   B->data         = (void*)mumps;
3141 
3142   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3143 
3144   *F = B;
3145   PetscFunctionReturn(0);
3146 }
3147 
3148 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3149 {
3150   Mat            B;
3151   PetscErrorCode ierr;
3152   Mat_MUMPS      *mumps;
3153   PetscBool      isSeqBAIJ;
3154   PetscMPIInt    size;
3155 
3156   PetscFunctionBegin;
3157   /* Create the factorization matrix */
3158   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr);
3159   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3160   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3161   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3162   ierr = MatSetUp(B);CHKERRQ(ierr);
3163 
3164   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3165   if (ftype == MAT_FACTOR_LU) {
3166     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3167     B->factortype            = MAT_FACTOR_LU;
3168     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3169     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3170     mumps->sym = 0;
3171     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3172   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead");
3173 
3174   B->ops->view        = MatView_MUMPS;
3175   B->ops->getinfo     = MatGetInfo_MUMPS;
3176 
3177   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3178   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3179   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3180   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3181   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3182   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3183   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3184   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3185   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3186   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3187   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3188   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3189   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3190 
3191   /* set solvertype */
3192   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3193   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3194   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3195   if (size == 1) {
3196     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */
3197     B->canuseordering = PETSC_TRUE;
3198   }
3199   B->ops->destroy = MatDestroy_MUMPS;
3200   B->data         = (void*)mumps;
3201 
3202   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3203 
3204   *F = B;
3205   PetscFunctionReturn(0);
3206 }
3207 
3208 /* MatGetFactor for Seq and MPI SELL matrices */
3209 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3210 {
3211   Mat            B;
3212   PetscErrorCode ierr;
3213   Mat_MUMPS      *mumps;
3214   PetscBool      isSeqSELL;
3215   PetscMPIInt    size;
3216 
3217   PetscFunctionBegin;
3218   /* Create the factorization matrix */
3219   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr);
3220   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3221   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3222   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3223   ierr = MatSetUp(B);CHKERRQ(ierr);
3224 
3225   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3226 
3227   B->ops->view        = MatView_MUMPS;
3228   B->ops->getinfo     = MatGetInfo_MUMPS;
3229 
3230   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3231   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3232   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3233   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3234   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3235   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3236   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3237   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3238   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3239   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3240   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3241 
3242   if (ftype == MAT_FACTOR_LU) {
3243     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3244     B->factortype            = MAT_FACTOR_LU;
3245     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3246     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3247     mumps->sym = 0;
3248     ierr = PetscStrallocpy(MATORDERINGEXTERNAL,(char**)&B->preferredordering[MAT_FACTOR_LU]);CHKERRQ(ierr);
3249   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3250 
3251   /* set solvertype */
3252   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3253   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3254   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
3255   if (size == 1) {
3256     /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization  */
3257     B->canuseordering = PETSC_TRUE;
3258   }
3259   B->ops->destroy = MatDestroy_MUMPS;
3260   B->data         = (void*)mumps;
3261 
3262   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3263 
3264   *F = B;
3265   PetscFunctionReturn(0);
3266 }
3267 
3268 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3269 {
3270   PetscErrorCode ierr;
3271 
3272   PetscFunctionBegin;
3273   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3274   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3275   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3276   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3277   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3278   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3279   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3280   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3281   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3282   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3283   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr);
3284   PetscFunctionReturn(0);
3285 }
3286 
3287