xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 445ca0904fedcd9c19e8b5d649a2e70cfb1cd372)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 #include <petscpkg_version.h>
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30 
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45 
46 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for
47    number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the
48    naming convention in PetscMPIInt, PetscBLASInt etc.
49 */
50 typedef MUMPS_INT PetscMUMPSInt;
51 
52 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
53   #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */
54     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
55   #endif
56 #else
57   #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */
58     #error "Petsc has not been tested with full 64-bit MUMPS and we choose to error out"
59   #endif
60 #endif
61 
62 #define MPIU_MUMPSINT             MPI_INT
63 #define PETSC_MUMPS_INT_MAX       2147483647
64 #define PETSC_MUMPS_INT_MIN       -2147483648
65 
66 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/
67 PETSC_STATIC_INLINE PetscErrorCode PetscMUMPSIntCast(PetscInt a,PetscMUMPSInt *b)
68 {
69   PetscFunctionBegin;
70   if (PetscDefined(USE_64BIT_INDICES) && PetscUnlikelyDebug(a > PETSC_MUMPS_INT_MAX || a < PETSC_MUMPS_INT_MIN)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
71   *b = (PetscMUMPSInt)(a);
72   PetscFunctionReturn(0);
73 }
74 
75 /* Put these utility routines here since they are only used in this file */
76 PETSC_STATIC_INLINE PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems *PetscOptionsObject,const char opt[],const char text[],const char man[],PetscMUMPSInt currentvalue,PetscMUMPSInt *value,PetscBool *set,PetscMUMPSInt lb,PetscMUMPSInt ub)
77 {
78   PetscErrorCode ierr;
79   PetscInt       myval;
80   PetscBool      myset;
81   PetscFunctionBegin;
82   /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */
83   ierr = PetscOptionsInt_Private(PetscOptionsObject,opt,text,man,(PetscInt)currentvalue,&myval,&myset,lb,ub);CHKERRQ(ierr);
84   if (myset) {ierr = PetscMUMPSIntCast(myval,value);CHKERRQ(ierr);}
85   if (set) *set = myset;
86   PetscFunctionReturn(0);
87 }
88 #define PetscOptionsMUMPSInt(a,b,c,d,e,f) PetscOptionsMUMPSInt_Private(PetscOptionsObject,a,b,c,d,e,f,PETSC_MUMPS_INT_MIN,PETSC_MUMPS_INT_MAX)
89 
90 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
91 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
92 #define PetscMUMPS_c(mumps) \
93   do { \
94     if (mumps->use_petsc_omp_support) { \
95       if (mumps->is_omp_master) { \
96         ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \
97         MUMPS_c(&mumps->id); \
98         ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \
99       } \
100       ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \
101       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
102          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
103          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
104          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
105       */ \
106       ierr = MPI_Bcast(mumps->id.infog, 40,MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr);  \
107       ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,     0,mumps->omp_comm);CHKERRQ(ierr); \
108       ierr = MPI_Bcast(mumps->id.info,  1, MPIU_MUMPSINT, 0,mumps->omp_comm);CHKERRQ(ierr);  \
109     } else { \
110       MUMPS_c(&mumps->id); \
111     } \
112   } while(0)
113 #else
114 #define PetscMUMPS_c(mumps) \
115   do { MUMPS_c(&mumps->id); } while (0)
116 #endif
117 
118 /* declare MumpsScalar */
119 #if defined(PETSC_USE_COMPLEX)
120 #if defined(PETSC_USE_REAL_SINGLE)
121 #define MumpsScalar mumps_complex
122 #else
123 #define MumpsScalar mumps_double_complex
124 #endif
125 #else
126 #define MumpsScalar PetscScalar
127 #endif
128 
129 /* macros s.t. indices match MUMPS documentation */
130 #define ICNTL(I) icntl[(I)-1]
131 #define CNTL(I) cntl[(I)-1]
132 #define INFOG(I) infog[(I)-1]
133 #define INFO(I) info[(I)-1]
134 #define RINFOG(I) rinfog[(I)-1]
135 #define RINFO(I) rinfo[(I)-1]
136 
137 typedef struct Mat_MUMPS Mat_MUMPS;
138 struct Mat_MUMPS {
139 #if defined(PETSC_USE_COMPLEX)
140 #if defined(PETSC_USE_REAL_SINGLE)
141   CMUMPS_STRUC_C id;
142 #else
143   ZMUMPS_STRUC_C id;
144 #endif
145 #else
146 #if defined(PETSC_USE_REAL_SINGLE)
147   SMUMPS_STRUC_C id;
148 #else
149   DMUMPS_STRUC_C id;
150 #endif
151 #endif
152 
153   MatStructure   matstruc;
154   PetscMPIInt    myid,petsc_size;
155   PetscMUMPSInt  *irn,*jcn;             /* the (i,j,v) triplets passed to mumps. */
156   PetscScalar    *val,*val_alloc;       /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */
157   PetscInt64     nnz;                   /* number of nonzeros. The type is called selective 64-bit in mumps */
158   PetscMUMPSInt  sym;
159   MPI_Comm       mumps_comm;
160   PetscMUMPSInt  ICNTL9_pre;            /* check if ICNTL(9) is changed from previous MatSolve */
161   VecScatter     scat_rhs, scat_sol;    /* used by MatSolve() */
162 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
163   PetscMUMPSInt  lrhs_loc,nloc_rhs,*irhs_loc;
164   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
165   PetscInt       *rhs_nrow,max_nrhs;
166   PetscMPIInt    *rhs_recvcounts,*rhs_disps;
167   PetscScalar    *rhs_loc,*rhs_recvbuf;
168   #endif
169 #endif
170   Vec            b_seq,x_seq;
171   PetscInt       ninfo,*info;           /* which INFO to display */
172   PetscInt       sizeredrhs;
173   PetscScalar    *schur_sol;
174   PetscInt       schur_sizesol;
175   PetscMUMPSInt  *ia_alloc,*ja_alloc;   /* work arrays used for the CSR struct for sparse rhs */
176   PetscInt64     cur_ilen,cur_jlen;     /* current len of ia_alloc[], ja_alloc[] */
177   PetscErrorCode (*ConvertToTriples)(Mat,PetscInt,MatReuse,Mat_MUMPS*);
178 
179   /* stuff used by petsc/mumps OpenMP support*/
180   PetscBool      use_petsc_omp_support;
181   PetscOmpCtrl   omp_ctrl;              /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
182   MPI_Comm       petsc_comm,omp_comm;   /* petsc_comm is petsc matrix's comm */
183   PetscInt64     *recvcount;            /* a collection of nnz on omp_master */
184   PetscMPIInt    tag,omp_comm_size;
185   PetscBool      is_omp_master;         /* is this rank the master of omp_comm */
186   MPI_Request    *reqs;
187 };
188 
189 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt.
190    Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices.
191  */
192 static PetscErrorCode PetscMUMPSIntCSRCast(Mat_MUMPS *mumps,PetscInt nrow,PetscInt *ia,PetscInt *ja,PetscMUMPSInt **ia_mumps,PetscMUMPSInt **ja_mumps,PetscMUMPSInt *nnz_mumps)
193 {
194   PetscErrorCode ierr;
195   PetscInt       nnz=ia[nrow]-1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscInt64 since mumps only uses PetscMUMPSInt for rhs */
196 
197   PetscFunctionBegin;
198 #if defined(PETSC_USE_64BIT_INDICES)
199   {
200     PetscInt i;
201     if (nrow+1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */
202       ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
203       ierr = PetscMalloc1(nrow+1,&mumps->ia_alloc);CHKERRQ(ierr);
204       mumps->cur_ilen = nrow+1;
205     }
206     if (nnz > mumps->cur_jlen) {
207       ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
208       ierr = PetscMalloc1(nnz,&mumps->ja_alloc);CHKERRQ(ierr);
209       mumps->cur_jlen = nnz;
210     }
211     for (i=0; i<nrow+1; i++) {ierr = PetscMUMPSIntCast(ia[i],&(mumps->ia_alloc[i]));CHKERRQ(ierr);}
212     for (i=0; i<nnz; i++)    {ierr = PetscMUMPSIntCast(ja[i],&(mumps->ja_alloc[i]));CHKERRQ(ierr);}
213     *ia_mumps = mumps->ia_alloc;
214     *ja_mumps = mumps->ja_alloc;
215   }
216 #else
217   *ia_mumps = ia;
218   *ja_mumps = ja;
219 #endif
220   ierr = PetscMUMPSIntCast(nnz,nnz_mumps);CHKERRQ(ierr);
221   PetscFunctionReturn(0);
222 }
223 
224 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
225 {
226   PetscErrorCode ierr;
227 
228   PetscFunctionBegin;
229   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
230   ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
231   ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
232   mumps->id.size_schur = 0;
233   mumps->id.schur_lld  = 0;
234   mumps->id.ICNTL(19)  = 0;
235   PetscFunctionReturn(0);
236 }
237 
238 /* solve with rhs in mumps->id.redrhs and return in the same location */
239 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
240 {
241   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
242   Mat                  S,B,X;
243   MatFactorSchurStatus schurstatus;
244   PetscInt             sizesol;
245   PetscErrorCode       ierr;
246 
247   PetscFunctionBegin;
248   ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr);
249   ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr);
250   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr);
251   ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr);
252 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
253   ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr);
254 #endif
255   switch (schurstatus) {
256   case MAT_FACTOR_SCHUR_FACTORED:
257     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr);
258     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
259 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
260     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
261 #endif
262     if (!mumps->id.ICNTL(9)) { /* transpose solve */
263       ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr);
264     } else {
265       ierr = MatMatSolve(S,B,X);CHKERRQ(ierr);
266     }
267     break;
268   case MAT_FACTOR_SCHUR_INVERTED:
269     sizesol = mumps->id.nrhs*mumps->id.size_schur;
270     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
271       ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
272       ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr);
273       mumps->schur_sizesol = sizesol;
274     }
275     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr);
276     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
277 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
278     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
279 #endif
280     ierr = MatProductCreateWithMat(S,B,NULL,X);CHKERRQ(ierr);
281     if (!mumps->id.ICNTL(9)) { /* transpose solve */
282       ierr = MatProductSetType(X,MATPRODUCT_AtB);CHKERRQ(ierr);
283     } else {
284       ierr = MatProductSetType(X,MATPRODUCT_AB);CHKERRQ(ierr);
285     }
286     ierr = MatProductSetFromOptions(X);CHKERRQ(ierr);
287     ierr = MatProductSymbolic(X);CHKERRQ(ierr);
288     ierr = MatProductNumeric(X);CHKERRQ(ierr);
289 
290     ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
291     break;
292   default:
293     SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status);
294     break;
295   }
296   ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr);
297   ierr = MatDestroy(&B);CHKERRQ(ierr);
298   ierr = MatDestroy(&X);CHKERRQ(ierr);
299   PetscFunctionReturn(0);
300 }
301 
302 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
303 {
304   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
305   PetscErrorCode ierr;
306 
307   PetscFunctionBegin;
308   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
309     PetscFunctionReturn(0);
310   }
311   if (!expansion) { /* prepare for the condensation step */
312     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
313     /* allocate MUMPS internal array to store reduced right-hand sides */
314     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
315       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
316       mumps->id.lredrhs = mumps->id.size_schur;
317       ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr);
318       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
319     }
320     mumps->id.ICNTL(26) = 1; /* condensation phase */
321   } else { /* prepare for the expansion step */
322     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
323     ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr);
324     mumps->id.ICNTL(26) = 2; /* expansion phase */
325     PetscMUMPS_c(mumps);
326     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
327     /* restore defaults */
328     mumps->id.ICNTL(26) = -1;
329     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
330     if (mumps->id.nrhs > 1) {
331       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
332       mumps->id.lredrhs = 0;
333       mumps->sizeredrhs = 0;
334     }
335   }
336   PetscFunctionReturn(0);
337 }
338 
339 /*
340   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
341 
342   input:
343     A       - matrix in aij,baij or sbaij format
344     shift   - 0: C style output triple; 1: Fortran style output triple.
345     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
346               MAT_REUSE_MATRIX:   only the values in v array are updated
347   output:
348     nnz     - dim of r, c, and v (number of local nonzero entries of A)
349     r, c, v - row and col index, matrix values (matrix triples)
350 
351   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
352   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
353   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
354 
355  */
356 
357 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
358 {
359   const PetscScalar *av;
360   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
361   PetscInt64        nz,rnz,i,j,k;
362   PetscErrorCode    ierr;
363   PetscMUMPSInt     *row,*col;
364   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
365 
366   PetscFunctionBegin;
367   ierr       = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
368   mumps->val = (PetscScalar*)av;
369   if (reuse == MAT_INITIAL_MATRIX) {
370     nz   = aa->nz;
371     ai   = aa->i;
372     aj   = aa->j;
373     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
374     for (i=k=0; i<M; i++) {
375       rnz = ai[i+1] - ai[i];
376       ajj = aj + ai[i];
377       for (j=0; j<rnz; j++) {
378         ierr = PetscMUMPSIntCast(i+shift,&row[k]);CHKERRQ(ierr);
379         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[k]);CHKERRQ(ierr);
380         k++;
381       }
382     }
383     mumps->irn = row;
384     mumps->jcn = col;
385     mumps->nnz = nz;
386   }
387   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
388   PetscFunctionReturn(0);
389 }
390 
391 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
392 {
393   PetscErrorCode ierr;
394   PetscInt64     nz,i,j,k,r;
395   Mat_SeqSELL    *a=(Mat_SeqSELL*)A->data;
396   PetscMUMPSInt  *row,*col;
397 
398   PetscFunctionBegin;
399   mumps->val = a->val;
400   if (reuse == MAT_INITIAL_MATRIX) {
401     nz   = a->sliidx[a->totalslices];
402     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
403     for (i=k=0; i<a->totalslices; i++) {
404       for (j=a->sliidx[i],r=0; j<a->sliidx[i+1]; j++,r=((r+1)&0x07)) {
405         ierr = PetscMUMPSIntCast(8*i+r+shift,&row[k++]);CHKERRQ(ierr);
406       }
407     }
408     for (i=0;i<nz;i++) {ierr = PetscMUMPSIntCast(a->colidx[i]+shift,&col[i]);CHKERRQ(ierr);}
409     mumps->irn = row;
410     mumps->jcn = col;
411     mumps->nnz = nz;
412   }
413   PetscFunctionReturn(0);
414 }
415 
416 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
417 {
418   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
419   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
420   PetscInt64     M,nz,idx=0,rnz,i,j,k,m;
421   PetscInt       bs;
422   PetscErrorCode ierr;
423   PetscMUMPSInt  *row,*col;
424 
425   PetscFunctionBegin;
426   ierr       = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
427   M          = A->rmap->N/bs;
428   mumps->val = aa->a;
429   if (reuse == MAT_INITIAL_MATRIX) {
430     ai   = aa->i; aj = aa->j;
431     nz   = bs2*aa->nz;
432     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
433     for (i=0; i<M; i++) {
434       ajj = aj + ai[i];
435       rnz = ai[i+1] - ai[i];
436       for (k=0; k<rnz; k++) {
437         for (j=0; j<bs; j++) {
438           for (m=0; m<bs; m++) {
439             ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[idx]);CHKERRQ(ierr);
440             ierr = PetscMUMPSIntCast(bs*ajj[k] + j + shift,&col[idx]);CHKERRQ(ierr);
441             idx++;
442           }
443         }
444       }
445     }
446     mumps->irn = row;
447     mumps->jcn = col;
448     mumps->nnz = nz;
449   }
450   PetscFunctionReturn(0);
451 }
452 
453 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
454 {
455   const PetscInt *ai, *aj,*ajj;
456   PetscInt        bs;
457   PetscInt64      nz,rnz,i,j,k,m;
458   PetscErrorCode  ierr;
459   PetscMUMPSInt   *row,*col;
460   PetscScalar     *val;
461   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
462   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
463 #if defined(PETSC_USE_COMPLEX)
464   PetscBool       hermitian;
465 #endif
466 
467   PetscFunctionBegin;
468 #if defined(PETSC_USE_COMPLEX)
469   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
470   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
471 #endif
472   ai   = aa->i;
473   aj   = aa->j;
474   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
475   if (reuse == MAT_INITIAL_MATRIX) {
476     nz   = aa->nz;
477     ierr = PetscMalloc2(bs2*nz,&row,bs2*nz,&col);CHKERRQ(ierr);
478     if (bs>1) {
479       ierr       = PetscMalloc1(bs2*nz,&mumps->val_alloc);CHKERRQ(ierr);
480       mumps->val = mumps->val_alloc;
481     } else {
482       mumps->val = aa->a;
483     }
484     mumps->irn = row;
485     mumps->jcn = col;
486   } else {
487     if (bs == 1) mumps->val = aa->a;
488     row = mumps->irn;
489     col = mumps->jcn;
490   }
491   val = mumps->val;
492 
493   nz = 0;
494   if (bs>1) {
495     for (i=0; i<mbs; i++) {
496       rnz = ai[i+1] - ai[i];
497       ajj = aj + ai[i];
498       for (j=0; j<rnz; j++) {
499         for (k=0; k<bs; k++) {
500           for (m=0; m<bs; m++) {
501             if (ajj[j]>i || k>=m) {
502               if (reuse == MAT_INITIAL_MATRIX) {
503                 ierr = PetscMUMPSIntCast(i*bs + m + shift,&row[nz]);CHKERRQ(ierr);
504                 ierr = PetscMUMPSIntCast(ajj[j]*bs + k + shift,&col[nz]);CHKERRQ(ierr);
505               }
506               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
507             }
508           }
509         }
510       }
511     }
512   } else if (reuse == MAT_INITIAL_MATRIX) {
513     for (i=0; i<mbs; i++) {
514       rnz = ai[i+1] - ai[i];
515       ajj = aj + ai[i];
516       for (j=0; j<rnz; j++) {
517         ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
518         ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
519         nz++;
520       }
521     }
522     if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz);
523   }
524   if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz;
525   PetscFunctionReturn(0);
526 }
527 
528 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
529 {
530   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
531   PetscInt64        nz,rnz,i,j;
532   const PetscScalar *av,*v1;
533   PetscScalar       *val;
534   PetscErrorCode    ierr;
535   PetscMUMPSInt     *row,*col;
536   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
537   PetscBool         missing;
538 #if defined(PETSC_USE_COMPLEX)
539   PetscBool         hermitian;
540 #endif
541 
542   PetscFunctionBegin;
543 #if defined(PETSC_USE_COMPLEX)
544   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
545   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
546 #endif
547   ierr  = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
548   ai    = aa->i; aj = aa->j;
549   adiag = aa->diag;
550   ierr  = MatMissingDiagonal_SeqAIJ(A,&missing,NULL);CHKERRQ(ierr);
551   if (reuse == MAT_INITIAL_MATRIX) {
552     /* count nz in the upper triangular part of A */
553     nz = 0;
554     if (missing) {
555       for (i=0; i<M; i++) {
556         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
557           for (j=ai[i];j<ai[i+1];j++) {
558             if (aj[j] < i) continue;
559             nz++;
560           }
561         } else {
562           nz += ai[i+1] - adiag[i];
563         }
564       }
565     } else {
566       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
567     }
568     ierr       = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
569     ierr       = PetscMalloc1(nz,&val);CHKERRQ(ierr);
570     mumps->nnz = nz;
571     mumps->irn = row;
572     mumps->jcn = col;
573     mumps->val = mumps->val_alloc = val;
574 
575     nz = 0;
576     if (missing) {
577       for (i=0; i<M; i++) {
578         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
579           for (j=ai[i];j<ai[i+1];j++) {
580             if (aj[j] < i) continue;
581             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
582             ierr = PetscMUMPSIntCast(aj[j]+shift,&col[nz]);CHKERRQ(ierr);
583             val[nz] = av[j];
584             nz++;
585           }
586         } else {
587           rnz = ai[i+1] - adiag[i];
588           ajj = aj + adiag[i];
589           v1  = av + adiag[i];
590           for (j=0; j<rnz; j++) {
591             ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
592             ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
593             val[nz++] = v1[j];
594           }
595         }
596       }
597     } else {
598       for (i=0; i<M; i++) {
599         rnz = ai[i+1] - adiag[i];
600         ajj = aj + adiag[i];
601         v1  = av + adiag[i];
602         for (j=0; j<rnz; j++) {
603           ierr = PetscMUMPSIntCast(i+shift,&row[nz]);CHKERRQ(ierr);
604           ierr = PetscMUMPSIntCast(ajj[j] + shift,&col[nz]);CHKERRQ(ierr);
605           val[nz++] = v1[j];
606         }
607       }
608     }
609   } else {
610     nz = 0;
611     val = mumps->val;
612     if (missing) {
613       for (i=0; i <M; i++) {
614         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
615           for (j=ai[i];j<ai[i+1];j++) {
616             if (aj[j] < i) continue;
617             val[nz++] = av[j];
618           }
619         } else {
620           rnz = ai[i+1] - adiag[i];
621           v1  = av + adiag[i];
622           for (j=0; j<rnz; j++) {
623             val[nz++] = v1[j];
624           }
625         }
626       }
627     } else {
628       for (i=0; i <M; i++) {
629         rnz = ai[i+1] - adiag[i];
630         v1  = av + adiag[i];
631         for (j=0; j<rnz; j++) {
632           val[nz++] = v1[j];
633         }
634       }
635     }
636   }
637   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
638   PetscFunctionReturn(0);
639 }
640 
641 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
642 {
643   PetscErrorCode    ierr;
644   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
645   PetscInt          bs;
646   PetscInt64        rstart,nz,i,j,k,m,jj,irow,countA,countB;
647   PetscMUMPSInt     *row,*col;
648   const PetscScalar *av,*bv,*v1,*v2;
649   PetscScalar       *val;
650   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
651   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
652   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
653   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
654 #if defined(PETSC_USE_COMPLEX)
655   PetscBool         hermitian;
656 #endif
657 
658   PetscFunctionBegin;
659 #if defined(PETSC_USE_COMPLEX)
660   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
661   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
662 #endif
663   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
664   rstart = A->rmap->rstart;
665   ai = aa->i;
666   aj = aa->j;
667   bi = bb->i;
668   bj = bb->j;
669   av = aa->a;
670   bv = bb->a;
671 
672   garray = mat->garray;
673 
674   if (reuse == MAT_INITIAL_MATRIX) {
675     nz   = (aa->nz+bb->nz)*bs2; /* just a conservative estimate */
676     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
677     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
678     /* can not decide the exact mumps->nnz now because of the SBAIJ */
679     mumps->irn = row;
680     mumps->jcn = col;
681     mumps->val = mumps->val_alloc = val;
682   } else {
683     val = mumps->val;
684   }
685 
686   jj = 0; irow = rstart;
687   for (i=0; i<mbs; i++) {
688     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
689     countA = ai[i+1] - ai[i];
690     countB = bi[i+1] - bi[i];
691     bjj    = bj + bi[i];
692     v1     = av + ai[i]*bs2;
693     v2     = bv + bi[i]*bs2;
694 
695     if (bs>1) {
696       /* A-part */
697       for (j=0; j<countA; j++) {
698         for (k=0; k<bs; k++) {
699           for (m=0; m<bs; m++) {
700             if (rstart + ajj[j]*bs>irow || k>=m) {
701               if (reuse == MAT_INITIAL_MATRIX) {
702                 ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
703                 ierr = PetscMUMPSIntCast(rstart + ajj[j]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
704               }
705               val[jj++] = v1[j*bs2 + m + k*bs];
706             }
707           }
708         }
709       }
710 
711       /* B-part */
712       for (j=0; j < countB; j++) {
713         for (k=0; k<bs; k++) {
714           for (m=0; m<bs; m++) {
715             if (reuse == MAT_INITIAL_MATRIX) {
716               ierr = PetscMUMPSIntCast(irow + m + shift,&row[jj]);CHKERRQ(ierr);
717               ierr = PetscMUMPSIntCast(garray[bjj[j]]*bs + k + shift,&col[jj]);CHKERRQ(ierr);
718             }
719             val[jj++] = v2[j*bs2 + m + k*bs];
720           }
721         }
722       }
723     } else {
724       /* A-part */
725       for (j=0; j<countA; j++) {
726         if (reuse == MAT_INITIAL_MATRIX) {
727           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
728           ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
729         }
730         val[jj++] = v1[j];
731       }
732 
733       /* B-part */
734       for (j=0; j < countB; j++) {
735         if (reuse == MAT_INITIAL_MATRIX) {
736           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
737           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
738         }
739         val[jj++] = v2[j];
740       }
741     }
742     irow+=bs;
743   }
744   mumps->nnz = jj;
745   PetscFunctionReturn(0);
746 }
747 
748 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
749 {
750   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
751   PetscErrorCode    ierr;
752   PetscInt64        rstart,nz,i,j,jj,irow,countA,countB;
753   PetscMUMPSInt     *row,*col;
754   const PetscScalar *av, *bv,*v1,*v2;
755   PetscScalar       *val;
756   Mat               Ad,Ao;
757   Mat_SeqAIJ        *aa;
758   Mat_SeqAIJ        *bb;
759 
760   PetscFunctionBegin;
761   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
762   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
763   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
764 
765   aa = (Mat_SeqAIJ*)(Ad)->data;
766   bb = (Mat_SeqAIJ*)(Ao)->data;
767   ai = aa->i;
768   aj = aa->j;
769   bi = bb->i;
770   bj = bb->j;
771 
772   rstart = A->rmap->rstart;
773 
774   if (reuse == MAT_INITIAL_MATRIX) {
775     nz   = (PetscInt64)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */
776     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
777     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
778     mumps->nnz = nz;
779     mumps->irn = row;
780     mumps->jcn = col;
781     mumps->val = mumps->val_alloc = val;
782   } else {
783     val = mumps->val;
784   }
785 
786   jj = 0; irow = rstart;
787   for (i=0; i<m; i++) {
788     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
789     countA = ai[i+1] - ai[i];
790     countB = bi[i+1] - bi[i];
791     bjj    = bj + bi[i];
792     v1     = av + ai[i];
793     v2     = bv + bi[i];
794 
795     /* A-part */
796     for (j=0; j<countA; j++) {
797       if (reuse == MAT_INITIAL_MATRIX) {
798         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
799         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
800       }
801       val[jj++] = v1[j];
802     }
803 
804     /* B-part */
805     for (j=0; j < countB; j++) {
806       if (reuse == MAT_INITIAL_MATRIX) {
807         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
808         ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
809       }
810       val[jj++] = v2[j];
811     }
812     irow++;
813   }
814   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
815   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
816   PetscFunctionReturn(0);
817 }
818 
819 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
820 {
821   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
822   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
823   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
824   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
825   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
826   const PetscInt    bs2=mat->bs2;
827   PetscErrorCode    ierr;
828   PetscInt          bs;
829   PetscInt64        nz,i,j,k,n,jj,irow,countA,countB,idx;
830   PetscMUMPSInt     *row,*col;
831   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
832   PetscScalar       *val;
833 
834   PetscFunctionBegin;
835   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
836   if (reuse == MAT_INITIAL_MATRIX) {
837     nz   = bs2*(aa->nz + bb->nz);
838     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
839     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
840     mumps->nnz = nz;
841     mumps->irn = row;
842     mumps->jcn = col;
843     mumps->val = mumps->val_alloc = val;
844   } else {
845     val = mumps->val;
846   }
847 
848   jj = 0; irow = rstart;
849   for (i=0; i<mbs; i++) {
850     countA = ai[i+1] - ai[i];
851     countB = bi[i+1] - bi[i];
852     ajj    = aj + ai[i];
853     bjj    = bj + bi[i];
854     v1     = av + bs2*ai[i];
855     v2     = bv + bs2*bi[i];
856 
857     idx = 0;
858     /* A-part */
859     for (k=0; k<countA; k++) {
860       for (j=0; j<bs; j++) {
861         for (n=0; n<bs; n++) {
862           if (reuse == MAT_INITIAL_MATRIX) {
863             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
864             ierr = PetscMUMPSIntCast(rstart + bs*ajj[k] + j + shift,&col[jj]);CHKERRQ(ierr);
865           }
866           val[jj++] = v1[idx++];
867         }
868       }
869     }
870 
871     idx = 0;
872     /* B-part */
873     for (k=0; k<countB; k++) {
874       for (j=0; j<bs; j++) {
875         for (n=0; n<bs; n++) {
876           if (reuse == MAT_INITIAL_MATRIX) {
877             ierr = PetscMUMPSIntCast(irow + n + shift,&row[jj]);CHKERRQ(ierr);
878             ierr = PetscMUMPSIntCast(bs*garray[bjj[k]] + j + shift,&col[jj]);CHKERRQ(ierr);
879           }
880           val[jj++] = v2[idx++];
881         }
882       }
883     }
884     irow += bs;
885   }
886   PetscFunctionReturn(0);
887 }
888 
889 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,PetscInt shift,MatReuse reuse,Mat_MUMPS *mumps)
890 {
891   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
892   PetscErrorCode    ierr;
893   PetscInt64        rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
894   PetscMUMPSInt     *row,*col;
895   const PetscScalar *av, *bv,*v1,*v2;
896   PetscScalar       *val;
897   Mat               Ad,Ao;
898   Mat_SeqAIJ        *aa;
899   Mat_SeqAIJ        *bb;
900 #if defined(PETSC_USE_COMPLEX)
901   PetscBool         hermitian;
902 #endif
903 
904   PetscFunctionBegin;
905 #if defined(PETSC_USE_COMPLEX)
906   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
907   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
908 #endif
909   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
910   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
911   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
912 
913   aa    = (Mat_SeqAIJ*)(Ad)->data;
914   bb    = (Mat_SeqAIJ*)(Ao)->data;
915   ai    = aa->i;
916   aj    = aa->j;
917   adiag = aa->diag;
918   bi    = bb->i;
919   bj    = bb->j;
920 
921   rstart = A->rmap->rstart;
922 
923   if (reuse == MAT_INITIAL_MATRIX) {
924     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
925     nzb = 0;    /* num of upper triangular entries in mat->B */
926     for (i=0; i<m; i++) {
927       nza   += (ai[i+1] - adiag[i]);
928       countB = bi[i+1] - bi[i];
929       bjj    = bj + bi[i];
930       for (j=0; j<countB; j++) {
931         if (garray[bjj[j]] > rstart) nzb++;
932       }
933     }
934 
935     nz   = nza + nzb; /* total nz of upper triangular part of mat */
936     ierr = PetscMalloc2(nz,&row,nz,&col);CHKERRQ(ierr);
937     ierr = PetscMalloc1(nz,&val);CHKERRQ(ierr);
938     mumps->nnz = nz;
939     mumps->irn = row;
940     mumps->jcn = col;
941     mumps->val = mumps->val_alloc = val;
942   } else {
943     val = mumps->val;
944   }
945 
946   jj = 0; irow = rstart;
947   for (i=0; i<m; i++) {
948     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
949     v1     = av + adiag[i];
950     countA = ai[i+1] - adiag[i];
951     countB = bi[i+1] - bi[i];
952     bjj    = bj + bi[i];
953     v2     = bv + bi[i];
954 
955     /* A-part */
956     for (j=0; j<countA; j++) {
957       if (reuse == MAT_INITIAL_MATRIX) {
958         ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
959         ierr = PetscMUMPSIntCast(rstart + ajj[j] + shift,&col[jj]);CHKERRQ(ierr);
960       }
961       val[jj++] = v1[j];
962     }
963 
964     /* B-part */
965     for (j=0; j < countB; j++) {
966       if (garray[bjj[j]] > rstart) {
967         if (reuse == MAT_INITIAL_MATRIX) {
968           ierr = PetscMUMPSIntCast(irow + shift,&row[jj]);CHKERRQ(ierr);
969           ierr = PetscMUMPSIntCast(garray[bjj[j]] + shift,&col[jj]);CHKERRQ(ierr);
970         }
971         val[jj++] = v2[j];
972       }
973     }
974     irow++;
975   }
976   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
977   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 PetscErrorCode MatDestroy_MUMPS(Mat A)
982 {
983   PetscErrorCode ierr;
984   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
985 
986   PetscFunctionBegin;
987   ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
988   ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr);
989   ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
990   ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr);
991   ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
992   ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr);
993   ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
994   ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
995   ierr = PetscFree(mumps->info);CHKERRQ(ierr);
996   ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
997   mumps->id.job = JOB_END;
998   PetscMUMPS_c(mumps);
999   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
1000 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1001   if (mumps->use_petsc_omp_support) {
1002     ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr);
1003     #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1004     ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1005     ierr = PetscFree3(mumps->rhs_nrow,mumps->rhs_recvcounts,mumps->rhs_disps);CHKERRQ(ierr);
1006     #endif
1007   }
1008 #endif
1009   ierr = PetscFree(mumps->ia_alloc);CHKERRQ(ierr);
1010   ierr = PetscFree(mumps->ja_alloc);CHKERRQ(ierr);
1011   ierr = PetscFree(mumps->recvcount);CHKERRQ(ierr);
1012   ierr = PetscFree(mumps->reqs);CHKERRQ(ierr);
1013 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1014   ierr = PetscFree(mumps->irhs_loc);CHKERRQ(ierr);
1015 #endif
1016   ierr = PetscFree(A->data);CHKERRQ(ierr);
1017 
1018   /* clear composed functions */
1019   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
1020   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr);
1021   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr);
1022   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr);
1023   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr);
1024   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr);
1025   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr);
1026   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr);
1027   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr);
1028   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr);
1029   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr);
1030   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr);
1031   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr);
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1036 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */
1037 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A,PetscInt nrhs,const PetscScalar *array)
1038 {
1039   PetscErrorCode     ierr;
1040   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1041   const PetscMPIInt  ompsize=mumps->omp_comm_size;
1042   PetscInt           i,m,M,rstart;
1043 
1044   PetscFunctionBegin;
1045   ierr = MatGetSize(A,&M,NULL);CHKERRQ(ierr);
1046   ierr = MatGetLocalSize(A,&m,NULL);CHKERRQ(ierr);
1047   if (M > PETSC_MUMPS_INT_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscInt too long for PetscMUMPSInt");
1048   if (ompsize == 1) {
1049     if (!mumps->irhs_loc) {
1050       mumps->nloc_rhs = m;
1051       ierr = PetscMalloc1(m,&mumps->irhs_loc);CHKERRQ(ierr);
1052       ierr = MatGetOwnershipRange(A,&rstart,NULL);CHKERRQ(ierr);
1053       for (i=0; i<m; i++) mumps->irhs_loc[i] = rstart+i+1; /* use 1-based indices */
1054     }
1055     mumps->id.rhs_loc = (MumpsScalar*)array;
1056   } else {
1057   #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1058     const PetscInt  *ranges;
1059     PetscMPIInt     j,k,sendcount,*petsc_ranks,*omp_ranks;
1060     MPI_Group       petsc_group,omp_group;
1061     PetscScalar     *recvbuf=NULL;
1062 
1063     if (mumps->is_omp_master) {
1064       /* Lazily initialize the omp stuff for distributed rhs */
1065       if (!mumps->irhs_loc) {
1066         ierr = PetscMalloc2(ompsize,&omp_ranks,ompsize,&petsc_ranks);CHKERRQ(ierr);
1067         ierr = PetscMalloc3(ompsize,&mumps->rhs_nrow,ompsize,&mumps->rhs_recvcounts,ompsize,&mumps->rhs_disps);CHKERRQ(ierr);
1068         ierr = MPI_Comm_group(mumps->petsc_comm,&petsc_group);CHKERRQ(ierr);
1069         ierr = MPI_Comm_group(mumps->omp_comm,&omp_group);CHKERRQ(ierr);
1070         for (j=0; j<ompsize; j++) omp_ranks[j] = j;
1071         ierr = MPI_Group_translate_ranks(omp_group,ompsize,omp_ranks,petsc_group,petsc_ranks);CHKERRQ(ierr);
1072 
1073         /* Populate mumps->irhs_loc[], rhs_nrow[] */
1074         mumps->nloc_rhs = 0;
1075         ierr = MatGetOwnershipRanges(A,&ranges);CHKERRQ(ierr);
1076         for (j=0; j<ompsize; j++) {
1077           mumps->rhs_nrow[j] = ranges[petsc_ranks[j]+1] - ranges[petsc_ranks[j]];
1078           mumps->nloc_rhs   += mumps->rhs_nrow[j];
1079         }
1080         ierr = PetscMalloc1(mumps->nloc_rhs,&mumps->irhs_loc);CHKERRQ(ierr);
1081         for (j=k=0; j<ompsize; j++) {
1082           for (i=ranges[petsc_ranks[j]]; i<ranges[petsc_ranks[j]+1]; i++,k++) mumps->irhs_loc[k] = i+1; /* uses 1-based indices */
1083         }
1084 
1085         ierr = PetscFree2(omp_ranks,petsc_ranks);CHKERRQ(ierr);
1086         ierr = MPI_Group_free(&petsc_group);CHKERRQ(ierr);
1087         ierr = MPI_Group_free(&omp_group);CHKERRQ(ierr);
1088       }
1089 
1090       /* Realloc buffers when current nrhs is bigger than what we have met */
1091       if (nrhs > mumps->max_nrhs) {
1092         ierr = PetscFree2(mumps->rhs_loc,mumps->rhs_recvbuf);CHKERRQ(ierr);
1093         ierr = PetscMalloc2(mumps->nloc_rhs*nrhs,&mumps->rhs_loc,mumps->nloc_rhs*nrhs,&mumps->rhs_recvbuf);CHKERRQ(ierr);
1094         mumps->max_nrhs = nrhs;
1095       }
1096 
1097       /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */
1098       for (j=0; j<ompsize; j++) {ierr = PetscMPIIntCast(mumps->rhs_nrow[j]*nrhs,&mumps->rhs_recvcounts[j]);CHKERRQ(ierr);}
1099       mumps->rhs_disps[0] = 0;
1100       for (j=1; j<ompsize; j++) {
1101         mumps->rhs_disps[j] = mumps->rhs_disps[j-1] + mumps->rhs_recvcounts[j-1];
1102         if (mumps->rhs_disps[j] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"PetscMPIInt overflow!");
1103       }
1104       recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */
1105     }
1106 
1107     ierr = PetscMPIIntCast(m*nrhs,&sendcount);CHKERRQ(ierr);
1108     ierr = MPI_Gatherv(array,sendcount,MPIU_SCALAR,recvbuf,mumps->rhs_recvcounts,mumps->rhs_disps,MPIU_SCALAR,0,mumps->omp_comm);CHKERRQ(ierr);
1109 
1110     if (mumps->is_omp_master) {
1111       if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */
1112         PetscScalar *dst,*dstbase = mumps->rhs_loc;
1113         for (j=0; j<ompsize; j++) {
1114           const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j];
1115           dst = dstbase;
1116           for (i=0; i<nrhs; i++) {
1117             ierr = PetscArraycpy(dst,src,mumps->rhs_nrow[j]);CHKERRQ(ierr);
1118             src += mumps->rhs_nrow[j];
1119             dst += mumps->nloc_rhs;
1120           }
1121           dstbase += mumps->rhs_nrow[j];
1122         }
1123       }
1124       mumps->id.rhs_loc = (MumpsScalar*)mumps->rhs_loc;
1125     }
1126   #endif /* PETSC_HAVE_OPENMP_SUPPORT */
1127   }
1128   mumps->id.nrhs     = nrhs;
1129   mumps->id.nloc_rhs = mumps->nloc_rhs;
1130   mumps->id.lrhs_loc = mumps->nloc_rhs;
1131   mumps->id.irhs_loc = mumps->irhs_loc;
1132   PetscFunctionReturn(0);
1133 }
1134 #endif /* PETSC_PKG_MUMPS_VERSION_GE(5,3,0) */
1135 
1136 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
1137 {
1138   Mat_MUMPS          *mumps=(Mat_MUMPS*)A->data;
1139 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1140   const PetscScalar  *rarray;
1141 #endif
1142   PetscScalar        *array;
1143   IS                 is_iden,is_petsc;
1144   PetscErrorCode     ierr;
1145   PetscInt           i;
1146   PetscBool          second_solve = PETSC_FALSE;
1147   static PetscBool   cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
1148 
1149   PetscFunctionBegin;
1150   ierr = PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1);CHKERRQ(ierr);
1151   ierr = PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2);CHKERRQ(ierr);
1152 
1153   if (A->factorerrortype) {
1154     ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1155     ierr = VecSetInf(x);CHKERRQ(ierr);
1156     PetscFunctionReturn(0);
1157   }
1158 
1159   mumps->id.nrhs = 1;
1160   if (mumps->petsc_size > 1) {
1161 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1162     mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1163     ierr = VecGetArrayRead(b,&rarray);CHKERRQ(ierr);
1164     ierr = MatMumpsSetUpDistRHSInfo(A,1,rarray);CHKERRQ(ierr);
1165 #else
1166     mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a seqential rhs vector*/
1167     ierr = VecScatterBegin(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1168     ierr = VecScatterEnd(mumps->scat_rhs,b,mumps->b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1169     if (!mumps->myid) {
1170       ierr = VecGetArray(mumps->b_seq,&array);CHKERRQ(ierr);
1171       mumps->id.rhs = (MumpsScalar*)array;
1172     }
1173 #endif
1174   } else {  /* petsc_size == 1 */
1175     mumps->id.ICNTL(20) = 0; /* dense centralized RHS */
1176     ierr = VecCopy(b,x);CHKERRQ(ierr);
1177     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
1178     mumps->id.rhs = (MumpsScalar*)array;
1179   }
1180 
1181   /*
1182      handle condensation step of Schur complement (if any)
1183      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
1184      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
1185      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
1186      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
1187   */
1188   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1189     if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1190     second_solve = PETSC_TRUE;
1191     ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1192   }
1193   /* solve phase */
1194   /*-------------*/
1195   mumps->id.job = JOB_SOLVE;
1196   PetscMUMPS_c(mumps);
1197   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1198 
1199   /* handle expansion step of Schur complement (if any) */
1200   if (second_solve) {
1201     ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1202   }
1203 
1204   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
1205     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
1206       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
1207       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1208     }
1209     if (!mumps->scat_sol) { /* create scatter scat_sol */
1210       PetscInt *isol2_loc=NULL;
1211       ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
1212       ierr = PetscMalloc1(mumps->id.lsol_loc,&isol2_loc);CHKERRQ(ierr);
1213       for (i=0; i<mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i]-1; /* change Fortran style to C style */
1214       ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,isol2_loc,PETSC_OWN_POINTER,&is_petsc);CHKERRQ(ierr);  /* to */
1215       ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr);
1216       ierr = ISDestroy(&is_iden);CHKERRQ(ierr);
1217       ierr = ISDestroy(&is_petsc);CHKERRQ(ierr);
1218       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
1219     }
1220 
1221     ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1222     ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1223   }
1224 
1225   if (mumps->petsc_size > 1) {
1226 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1227     ierr = VecRestoreArrayRead(b,&rarray);CHKERRQ(ierr);
1228 #else
1229     if (!mumps->myid) {ierr = VecRestoreArray(mumps->b_seq,&array);CHKERRQ(ierr);}
1230 #endif
1231   } else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);}
1232 
1233   ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr);
1234   PetscFunctionReturn(0);
1235 }
1236 
1237 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
1238 {
1239   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   mumps->id.ICNTL(9) = 0;
1244   ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr);
1245   mumps->id.ICNTL(9) = 1;
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1250 {
1251   PetscErrorCode    ierr;
1252   Mat               Bt = NULL;
1253   PetscBool         denseX,denseB,flg,flgT;
1254   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1255   PetscInt          i,nrhs,M;
1256   PetscScalar       *array;
1257   const PetscScalar *rbray;
1258   PetscInt          lsol_loc,nlsol_loc,*idxx,iidx = 0;
1259   PetscMUMPSInt     *isol_loc,*isol_loc_save;
1260   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1261   IS                is_to,is_from;
1262   PetscInt          k,proc,j,m,myrstart;
1263   const PetscInt    *rstart;
1264   Vec               v_mpi,msol_loc;
1265   VecScatter        scat_sol;
1266 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1267   Vec               b_seq;
1268   VecScatter        scat_rhs;
1269 #endif
1270   PetscScalar       *aa;
1271   PetscInt          spnr,*ia,*ja;
1272   Mat_MPIAIJ        *b = NULL;
1273 
1274   PetscFunctionBegin;
1275   ierr = PetscObjectTypeCompareAny((PetscObject)X,&denseX,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1276   if (!denseX) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1277 
1278   ierr = PetscObjectTypeCompareAny((PetscObject)B,&denseB,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1279   if (denseB) {
1280     if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1281     mumps->id.ICNTL(20)= 0; /* dense RHS */
1282   } else { /* sparse B */
1283     if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1284     ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr);
1285     if (flgT) { /* input B is transpose of actural RHS matrix,
1286                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1287       ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr);
1288     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1289     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1290   }
1291 
1292   ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr);
1293   mumps->id.nrhs = nrhs;
1294   mumps->id.lrhs = M;
1295   mumps->id.rhs  = NULL;
1296 
1297   if (mumps->petsc_size == 1) {
1298     PetscScalar *aa;
1299     PetscInt    spnr,*ia,*ja;
1300     PetscBool   second_solve = PETSC_FALSE;
1301 
1302     ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1303     mumps->id.rhs = (MumpsScalar*)array;
1304 
1305     if (denseB) {
1306       /* copy B to X */
1307       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1308       ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr);
1309       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1310     } else { /* sparse B */
1311       ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr);
1312       ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1313       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1314       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1315       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1316     }
1317     /* handle condensation step of Schur complement (if any) */
1318     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1319       second_solve = PETSC_TRUE;
1320       ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1321     }
1322     /* solve phase */
1323     /*-------------*/
1324     mumps->id.job = JOB_SOLVE;
1325     PetscMUMPS_c(mumps);
1326     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1327 
1328     /* handle expansion step of Schur complement (if any) */
1329     if (second_solve) {
1330       ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1331     }
1332     if (!denseB) { /* sparse B */
1333       ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr);
1334       ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1335       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1336     }
1337     ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1338     PetscFunctionReturn(0);
1339   }
1340 
1341   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1342   if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1343 
1344   /* create msol_loc to hold mumps local solution */
1345   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1346   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1347 
1348   lsol_loc  = mumps->id.lsol_loc;
1349   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1350   ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr);
1351   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1352   mumps->id.isol_loc = isol_loc;
1353 
1354   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr);
1355 
1356   if (denseB) {
1357 #if PETSC_PKG_MUMPS_VERSION_GE(5,3,0)
1358     mumps->id.ICNTL(20) = 10; /* dense distributed RHS */
1359     ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1360     ierr = MatMumpsSetUpDistRHSInfo(A,nrhs,rbray);CHKERRQ(ierr);
1361     ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1362     ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1363     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,NULL,&v_mpi);CHKERRQ(ierr);
1364 #else
1365     /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1366        very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1367        0, re-arrange B into desired order, which is a local operation.
1368      */
1369 
1370     /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */
1371     /* wrap dense rhs matrix B into a vector v_mpi */
1372     ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1373     ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr);
1374     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1375     ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr);
1376 
1377     /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1378     if (!mumps->myid) {
1379       PetscInt *idx;
1380       /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1381       ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr);
1382       ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr);
1383       k = 0;
1384       for (proc=0; proc<mumps->petsc_size; proc++){
1385         for (j=0; j<nrhs; j++){
1386           for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1387         }
1388       }
1389 
1390       ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr);
1391       ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr);
1392       ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr);
1393     } else {
1394       ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr);
1395       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr);
1396       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr);
1397     }
1398     ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr);
1399     ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1400     ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1401     ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1402     ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1403 
1404     if (!mumps->myid) { /* define rhs on the host */
1405       ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr);
1406       mumps->id.rhs = (MumpsScalar*)bray;
1407       ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr);
1408     }
1409 #endif
1410   } else { /* sparse B */
1411     b = (Mat_MPIAIJ*)Bt->data;
1412 
1413     /* wrap dense X into a vector v_mpi */
1414     ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr);
1415     ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr);
1416     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1417     ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr);
1418 
1419     if (!mumps->myid) {
1420       ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr);
1421       ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1422       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1423       ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
1424       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1425     } else {
1426       mumps->id.irhs_ptr    = NULL;
1427       mumps->id.irhs_sparse = NULL;
1428       mumps->id.nz_rhs      = 0;
1429       mumps->id.rhs_sparse  = NULL;
1430     }
1431   }
1432 
1433   /* solve phase */
1434   /*-------------*/
1435   mumps->id.job = JOB_SOLVE;
1436   PetscMUMPS_c(mumps);
1437   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1438 
1439   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1440   ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1441   ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr);
1442 
1443   /* create scatter scat_sol */
1444   ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr);
1445   /* iidx: index for scatter mumps solution to petsc X */
1446 
1447   ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr);
1448   ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr);
1449   for (i=0; i<lsol_loc; i++) {
1450     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1451 
1452     for (proc=0; proc<mumps->petsc_size; proc++){
1453       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1454         myrstart = rstart[proc];
1455         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1456         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1457         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1458         break;
1459       }
1460     }
1461 
1462     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1463   }
1464   ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr);
1465   ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr);
1466   ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1467   ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1468   ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1469   ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1470   ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1471 
1472   /* free spaces */
1473   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1474   mumps->id.isol_loc = isol_loc_save;
1475 
1476   ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr);
1477   ierr = PetscFree(idxx);CHKERRQ(ierr);
1478   ierr = VecDestroy(&msol_loc);CHKERRQ(ierr);
1479   ierr = VecDestroy(&v_mpi);CHKERRQ(ierr);
1480   if (!denseB) {
1481     if (!mumps->myid) {
1482       b = (Mat_MPIAIJ*)Bt->data;
1483       ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr);
1484       ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1485       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1486     }
1487   } else {
1488 #if PETSC_PKG_MUMPS_VERSION_LT(5,3,0)
1489     ierr = VecDestroy(&b_seq);CHKERRQ(ierr);
1490     ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr);
1491 #endif
1492   }
1493   ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr);
1494   ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr);
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1499 {
1500   PetscErrorCode ierr;
1501   PetscBool      flg;
1502   Mat            B;
1503 
1504   PetscFunctionBegin;
1505   ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
1506   if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1507 
1508   /* Create B=Bt^T that uses Bt's data structure */
1509   ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr);
1510 
1511   ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr);
1512   ierr = MatDestroy(&B);CHKERRQ(ierr);
1513   PetscFunctionReturn(0);
1514 }
1515 
1516 #if !defined(PETSC_USE_COMPLEX)
1517 /*
1518   input:
1519    F:        numeric factor
1520   output:
1521    nneg:     total number of negative pivots
1522    nzero:    total number of zero pivots
1523    npos:     (global dimension of F) - nneg - nzero
1524 */
1525 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
1526 {
1527   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1528   PetscErrorCode ierr;
1529   PetscMPIInt    size;
1530 
1531   PetscFunctionBegin;
1532   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr);
1533   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1534   if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13));
1535 
1536   if (nneg) *nneg = mumps->id.INFOG(12);
1537   if (nzero || npos) {
1538     if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1539     if (nzero) *nzero = mumps->id.INFOG(28);
1540     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1541   }
1542   PetscFunctionReturn(0);
1543 }
1544 #endif
1545 
1546 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1547 {
1548   PetscErrorCode ierr;
1549   PetscInt       i,nreqs;
1550   PetscMUMPSInt  *irn,*jcn;
1551   PetscMPIInt    count;
1552   PetscInt64     totnnz,remain;
1553   const PetscInt osize=mumps->omp_comm_size;
1554   PetscScalar    *val;
1555 
1556   PetscFunctionBegin;
1557   if (osize > 1) {
1558     if (reuse == MAT_INITIAL_MATRIX) {
1559       /* master first gathers counts of nonzeros to receive */
1560       if (mumps->is_omp_master) {ierr = PetscMalloc1(osize,&mumps->recvcount);CHKERRQ(ierr);}
1561       ierr = MPI_Gather(&mumps->nnz,1,MPIU_INT64,mumps->recvcount,1,MPIU_INT64,0/*master*/,mumps->omp_comm);CHKERRQ(ierr);
1562 
1563       /* Then each computes number of send/recvs */
1564       if (mumps->is_omp_master) {
1565         /* Start from 1 since self communication is not done in MPI */
1566         nreqs = 0;
1567         for (i=1; i<osize; i++) nreqs += (mumps->recvcount[i]+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1568       } else {
1569         nreqs = (mumps->nnz+PETSC_MPI_INT_MAX-1)/PETSC_MPI_INT_MAX;
1570       }
1571       ierr = PetscMalloc1(nreqs*3,&mumps->reqs);CHKERRQ(ierr); /* Triple the requests since we send irn, jcn and val seperately */
1572 
1573       /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others.
1574          MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz
1575          might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size
1576          is very small, the current approach should have no extra overhead compared to MPI_Gatherv.
1577        */
1578       nreqs = 0; /* counter for actual send/recvs */
1579       if (mumps->is_omp_master) {
1580         for (i=0,totnnz=0; i<osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */
1581         ierr = PetscMalloc2(totnnz,&irn,totnnz,&jcn);CHKERRQ(ierr);
1582         ierr = PetscMalloc1(totnnz,&val);CHKERRQ(ierr);
1583 
1584         /* Self communication */
1585         ierr = PetscArraycpy(irn,mumps->irn,mumps->nnz);CHKERRQ(ierr);
1586         ierr = PetscArraycpy(jcn,mumps->jcn,mumps->nnz);CHKERRQ(ierr);
1587         ierr = PetscArraycpy(val,mumps->val,mumps->nnz);CHKERRQ(ierr);
1588 
1589         /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */
1590         ierr = PetscFree2(mumps->irn,mumps->jcn);CHKERRQ(ierr);
1591         ierr = PetscFree(mumps->val_alloc);CHKERRQ(ierr);
1592         mumps->nnz = totnnz;
1593         mumps->irn = irn;
1594         mumps->jcn = jcn;
1595         mumps->val = mumps->val_alloc = val;
1596 
1597         irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */
1598         jcn += mumps->recvcount[0];
1599         val += mumps->recvcount[0];
1600 
1601         /* Remote communication */
1602         for (i=1; i<osize; i++) {
1603           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1604           remain = mumps->recvcount[i] - count;
1605           while (count>0) {
1606             ierr    = MPI_Irecv(irn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1607             ierr    = MPI_Irecv(jcn,count,MPIU_MUMPSINT,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1608             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,  i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1609             irn    += count;
1610             jcn    += count;
1611             val    += count;
1612             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1613             remain -= count;
1614           }
1615         }
1616       } else {
1617         irn    = mumps->irn;
1618         jcn    = mumps->jcn;
1619         val    = mumps->val;
1620         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1621         remain = mumps->nnz - count;
1622         while (count>0) {
1623           ierr    = MPI_Isend(irn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1624           ierr    = MPI_Isend(jcn,count,MPIU_MUMPSINT,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1625           ierr    = MPI_Isend(val,count,MPIU_SCALAR,  0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1626           irn    += count;
1627           jcn    += count;
1628           val    += count;
1629           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1630           remain -= count;
1631         }
1632       }
1633     } else {
1634       nreqs = 0;
1635       if (mumps->is_omp_master) {
1636         val = mumps->val + mumps->recvcount[0];
1637         for (i=1; i<osize; i++) { /* Remote communication only since self data is already in place */
1638           count  = PetscMin(mumps->recvcount[i],PETSC_MPI_INT_MAX);
1639           remain = mumps->recvcount[i] - count;
1640           while (count>0) {
1641             ierr    = MPI_Irecv(val,count,MPIU_SCALAR,i,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1642             val    += count;
1643             count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1644             remain -= count;
1645           }
1646         }
1647       } else {
1648         val    = mumps->val;
1649         count  = PetscMin(mumps->nnz,PETSC_MPI_INT_MAX);
1650         remain = mumps->nnz - count;
1651         while (count>0) {
1652           ierr    = MPI_Isend(val,count,MPIU_SCALAR,0,mumps->tag,mumps->omp_comm,&mumps->reqs[nreqs++]);CHKERRQ(ierr);
1653           val    += count;
1654           count   = PetscMin(remain,PETSC_MPI_INT_MAX);
1655           remain -= count;
1656         }
1657       }
1658     }
1659     ierr = MPI_Waitall(nreqs,mumps->reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr);
1660     mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */
1661   }
1662   PetscFunctionReturn(0);
1663 }
1664 
1665 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1666 {
1667   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1668   PetscErrorCode ierr;
1669   PetscBool      isMPIAIJ;
1670 
1671   PetscFunctionBegin;
1672   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1673     if (mumps->id.INFOG(1) == -6) {
1674       ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1675     }
1676     ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1677     PetscFunctionReturn(0);
1678   }
1679 
1680   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps);CHKERRQ(ierr);
1681   ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr);
1682 
1683   /* numerical factorization phase */
1684   /*-------------------------------*/
1685   mumps->id.job = JOB_FACTNUMERIC;
1686   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1687     if (!mumps->myid) {
1688       mumps->id.a = (MumpsScalar*)mumps->val;
1689     }
1690   } else {
1691     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1692   }
1693   PetscMUMPS_c(mumps);
1694   if (mumps->id.INFOG(1) < 0) {
1695     if (A->erroriffailure) {
1696       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1697     } else {
1698       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1699         ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1700         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1701       } else if (mumps->id.INFOG(1) == -13) {
1702         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1703         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1704       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) {
1705         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1706         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1707       } else {
1708         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1709         F->factorerrortype = MAT_FACTOR_OTHER;
1710       }
1711     }
1712   }
1713   if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16));
1714 
1715   F->assembled    = PETSC_TRUE;
1716   mumps->matstruc = SAME_NONZERO_PATTERN;
1717   if (F->schur) { /* reset Schur status to unfactored */
1718 #if defined(PETSC_HAVE_CUDA)
1719     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1720 #endif
1721     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1722       mumps->id.ICNTL(19) = 2;
1723       ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr);
1724     }
1725     ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr);
1726   }
1727 
1728   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1729   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1730 
1731   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1732   if (mumps->petsc_size > 1) {
1733     PetscInt    lsol_loc;
1734     PetscScalar *sol_loc;
1735 
1736     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr);
1737 
1738     /* distributed solution; Create x_seq=sol_loc for repeated use */
1739     if (mumps->x_seq) {
1740       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1741       ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
1742       ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
1743     }
1744     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1745     ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr);
1746     mumps->id.lsol_loc = lsol_loc;
1747     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1748     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr);
1749   }
1750   ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr);
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 /* Sets MUMPS options from the options database */
1755 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1756 {
1757   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1758   PetscErrorCode ierr;
1759   PetscMUMPSInt  icntl=0;
1760   PetscInt       info[80],i,ninfo=80;
1761   PetscBool      flg=PETSC_FALSE;
1762 
1763   PetscFunctionBegin;
1764   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
1765   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr);
1766   if (flg) mumps->id.ICNTL(1) = icntl;
1767   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr);
1768   if (flg) mumps->id.ICNTL(2) = icntl;
1769   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr);
1770   if (flg) mumps->id.ICNTL(3) = icntl;
1771 
1772   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
1773   if (flg) mumps->id.ICNTL(4) = icntl;
1774   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1775 
1776   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr);
1777   if (flg) mumps->id.ICNTL(6) = icntl;
1778 
1779   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
1780   if (flg) {
1781     if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
1782     else mumps->id.ICNTL(7) = icntl;
1783   }
1784 
1785   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr);
1786   /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */
1787   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr);
1788   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr);
1789   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr);
1790   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr);
1791   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr);
1792   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr);
1793   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1794     ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
1795     ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
1796   }
1797   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */
1798   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */
1799 
1800   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr);
1801   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr);
1802   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr);
1803   if (mumps->id.ICNTL(24)) {
1804     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1805   }
1806 
1807   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr);
1808   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr);
1809   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_27","ICNTL(27): controls the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr);
1810   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr);
1811   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr);
1812   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */
1813   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr);
1814   /* ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr);  -- not supported by PETSc API */
1815   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr);
1816   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr);
1817   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr);
1818   ierr = PetscOptionsMUMPSInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr);
1819 
1820   ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr);
1821   ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr);
1822   ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr);
1823   ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr);
1824   ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr);
1825   ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr);
1826 
1827   ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL);CHKERRQ(ierr);
1828 
1829   ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr);
1830   if (ninfo) {
1831     if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo);
1832     ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr);
1833     mumps->ninfo = ninfo;
1834     for (i=0; i<ninfo; i++) {
1835       if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo);
1836       else  mumps->info[i] = info[i];
1837     }
1838   }
1839 
1840   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1845 {
1846   PetscErrorCode ierr;
1847   PetscInt       nthreads=0;
1848 
1849   PetscFunctionBegin;
1850   mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1851   ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr);
1852   ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */
1853 
1854   ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr);
1855   if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1856   ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr);
1857   if (mumps->use_petsc_omp_support) {
1858 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1859     ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr);
1860     ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr);
1861 #else
1862     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n");
1863 #endif
1864   } else {
1865     mumps->omp_comm      = PETSC_COMM_SELF;
1866     mumps->mumps_comm    = mumps->petsc_comm;
1867     mumps->is_omp_master = PETSC_TRUE;
1868   }
1869   ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr);
1870   mumps->reqs = NULL;
1871   mumps->tag  = 0;
1872 
1873   mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1874   mumps->id.job = JOB_INIT;
1875   mumps->id.par = 1;  /* host participates factorizaton and solve */
1876   mumps->id.sym = mumps->sym;
1877 
1878   PetscMUMPS_c(mumps);
1879   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
1880 
1881   /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1882      For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1883    */
1884   ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */
1885   ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr);
1886 
1887   mumps->scat_rhs = NULL;
1888   mumps->scat_sol = NULL;
1889 
1890   /* set PETSc-MUMPS default options - override MUMPS default */
1891   mumps->id.ICNTL(3) = 0;
1892   mumps->id.ICNTL(4) = 0;
1893   if (mumps->petsc_size == 1) {
1894     mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1895   } else {
1896     mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1897     mumps->id.ICNTL(20) = 0;   /* rhs is in dense format */
1898     mumps->id.ICNTL(21) = 1;   /* distributed solution */
1899   }
1900 
1901   /* schur */
1902   mumps->id.size_schur    = 0;
1903   mumps->id.listvar_schur = NULL;
1904   mumps->id.schur         = NULL;
1905   mumps->sizeredrhs       = 0;
1906   mumps->schur_sol        = NULL;
1907   mumps->schur_sizesol    = 0;
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1912 {
1913   PetscErrorCode ierr;
1914 
1915   PetscFunctionBegin;
1916   if (mumps->id.INFOG(1) < 0) {
1917     if (A->erroriffailure) {
1918       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1919     } else {
1920       if (mumps->id.INFOG(1) == -6) {
1921         ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1922         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1923       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1924         ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1925         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1926       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1927         ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr);
1928       } else {
1929         ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1930         F->factorerrortype = MAT_FACTOR_OTHER;
1931       }
1932     }
1933   }
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */
1938 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1939 {
1940   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1941   PetscErrorCode ierr;
1942   Vec            b;
1943   const PetscInt M = A->rmap->N;
1944 
1945   PetscFunctionBegin;
1946   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
1947 
1948   /* Set MUMPS options from the options database */
1949   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
1950 
1951   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
1952   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
1953 
1954   /* analysis phase */
1955   /*----------------*/
1956   mumps->id.job = JOB_FACTSYMBOLIC;
1957   mumps->id.n   = M;
1958   switch (mumps->id.ICNTL(18)) {
1959   case 0:  /* centralized assembled matrix input */
1960     if (!mumps->myid) {
1961       mumps->id.nnz = mumps->nnz;
1962       mumps->id.irn = mumps->irn;
1963       mumps->id.jcn = mumps->jcn;
1964       if (mumps->id.ICNTL(6)>1) mumps->id.a = (MumpsScalar*)mumps->val;
1965       if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */
1966         /*
1967         PetscBool      flag;
1968         ierr = ISEqual(r,c,&flag);CHKERRQ(ierr);
1969         if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm");
1970         ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF);
1971          */
1972         if (!mumps->myid) {
1973           const PetscInt *idx;
1974           PetscInt       i;
1975 
1976           ierr = PetscMalloc1(M,&mumps->id.perm_in);CHKERRQ(ierr);
1977           ierr = ISGetIndices(r,&idx);CHKERRQ(ierr);
1978           for (i=0; i<M; i++) {ierr = PetscMUMPSIntCast(idx[i]+1,&(mumps->id.perm_in[i]));CHKERRQ(ierr);} /* perm_in[]: start from 1, not 0! */
1979           ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr);
1980         }
1981       }
1982     }
1983     break;
1984   case 3:  /* distributed assembled matrix input (size>1) */
1985     mumps->id.nnz_loc = mumps->nnz;
1986     mumps->id.irn_loc = mumps->irn;
1987     mumps->id.jcn_loc = mumps->jcn;
1988     if (mumps->id.ICNTL(6)>1) mumps->id.a_loc = (MumpsScalar*)mumps->val;
1989     /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1990     ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
1991     ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
1992     ierr = VecDestroy(&b);CHKERRQ(ierr);
1993     break;
1994   }
1995   PetscMUMPS_c(mumps);
1996   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
1997 
1998   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
1999   F->ops->solve           = MatSolve_MUMPS;
2000   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2001   F->ops->matsolve        = MatMatSolve_MUMPS;
2002   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
2003   PetscFunctionReturn(0);
2004 }
2005 
2006 /* Note the Petsc r and c permutations are ignored */
2007 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
2008 {
2009   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2010   PetscErrorCode ierr;
2011   Vec            b;
2012   const PetscInt M = A->rmap->N;
2013 
2014   PetscFunctionBegin;
2015   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2016 
2017   /* Set MUMPS options from the options database */
2018   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2019 
2020   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2021   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2022 
2023   /* analysis phase */
2024   /*----------------*/
2025   mumps->id.job = JOB_FACTSYMBOLIC;
2026   mumps->id.n   = M;
2027   switch (mumps->id.ICNTL(18)) {
2028   case 0:  /* centralized assembled matrix input */
2029     if (!mumps->myid) {
2030       mumps->id.nnz = mumps->nnz;
2031       mumps->id.irn = mumps->irn;
2032       mumps->id.jcn = mumps->jcn;
2033       if (mumps->id.ICNTL(6)>1) {
2034         mumps->id.a = (MumpsScalar*)mumps->val;
2035       }
2036     }
2037     break;
2038   case 3:  /* distributed assembled matrix input (size>1) */
2039     mumps->id.nnz_loc = mumps->nnz;
2040     mumps->id.irn_loc = mumps->irn;
2041     mumps->id.jcn_loc = mumps->jcn;
2042     if (mumps->id.ICNTL(6)>1) {
2043       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2044     }
2045     /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2046     ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2047     ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2048     ierr = VecDestroy(&b);CHKERRQ(ierr);
2049     break;
2050   }
2051   PetscMUMPS_c(mumps);
2052   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2053 
2054   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
2055   F->ops->solve           = MatSolve_MUMPS;
2056   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
2057   PetscFunctionReturn(0);
2058 }
2059 
2060 /* Note the Petsc r permutation and factor info are ignored */
2061 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
2062 {
2063   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
2064   PetscErrorCode ierr;
2065   Vec            b;
2066   const PetscInt M = A->rmap->N;
2067 
2068   PetscFunctionBegin;
2069   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
2070 
2071   /* Set MUMPS options from the options database */
2072   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
2073 
2074   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps);CHKERRQ(ierr);
2075   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
2076 
2077   /* analysis phase */
2078   /*----------------*/
2079   mumps->id.job = JOB_FACTSYMBOLIC;
2080   mumps->id.n   = M;
2081   switch (mumps->id.ICNTL(18)) {
2082   case 0:  /* centralized assembled matrix input */
2083     if (!mumps->myid) {
2084       mumps->id.nnz = mumps->nnz;
2085       mumps->id.irn = mumps->irn;
2086       mumps->id.jcn = mumps->jcn;
2087       if (mumps->id.ICNTL(6)>1) {
2088         mumps->id.a = (MumpsScalar*)mumps->val;
2089       }
2090     }
2091     break;
2092   case 3:  /* distributed assembled matrix input (size>1) */
2093     mumps->id.nnz_loc = mumps->nnz;
2094     mumps->id.irn_loc = mumps->irn;
2095     mumps->id.jcn_loc = mumps->jcn;
2096     if (mumps->id.ICNTL(6)>1) {
2097       mumps->id.a_loc = (MumpsScalar*)mumps->val;
2098     }
2099     /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
2100     ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
2101     ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
2102     ierr = VecDestroy(&b);CHKERRQ(ierr);
2103     break;
2104   }
2105   PetscMUMPS_c(mumps);
2106   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
2107 
2108   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
2109   F->ops->solve                 = MatSolve_MUMPS;
2110   F->ops->solvetranspose        = MatSolve_MUMPS;
2111   F->ops->matsolve              = MatMatSolve_MUMPS;
2112   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
2113 #if defined(PETSC_USE_COMPLEX)
2114   F->ops->getinertia = NULL;
2115 #else
2116   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
2117 #endif
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
2122 {
2123   PetscErrorCode    ierr;
2124   PetscBool         iascii;
2125   PetscViewerFormat format;
2126   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
2127 
2128   PetscFunctionBegin;
2129   /* check if matrix is mumps type */
2130   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
2131 
2132   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
2133   if (iascii) {
2134     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
2135     if (format == PETSC_VIEWER_ASCII_INFO) {
2136       ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
2137       ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d \n",mumps->id.sym);CHKERRQ(ierr);
2138       ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d \n",mumps->id.par);CHKERRQ(ierr);
2139       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr);
2140       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr);
2141       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr);
2142       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr);
2143       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr);
2144       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr);
2145       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr);
2146       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr);
2147       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr);
2148       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr);
2149       if (mumps->id.ICNTL(11)>0) {
2150         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr);
2151         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr);
2152         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr);
2153         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr);
2154         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr);
2155         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr);
2156       }
2157       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr);
2158       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (sequential factorization of the root node):  %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr);
2159       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr);
2160       /* ICNTL(15-17) not used */
2161       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr);
2162       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr);
2163       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (RHS sparse pattern):                         %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr);
2164       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr);
2165       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr);
2166       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr);
2167 
2168       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr);
2169       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr);
2170       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for RHS or solution):          %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr);
2171       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (blocking size for multiple RHS):             %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr);
2172       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr);
2173       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr);
2174 
2175       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr);
2176       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr);
2177       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr);
2178       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr);
2179       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr);
2180       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr);
2181 
2182       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1));CHKERRQ(ierr);
2183       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr);
2184       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3));CHKERRQ(ierr);
2185       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4));CHKERRQ(ierr);
2186       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5));CHKERRQ(ierr);
2187       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7));CHKERRQ(ierr);
2188 
2189       /* infomation local to each processor */
2190       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
2191       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
2192       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr);
2193       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2194       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
2195       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr);
2196       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2197       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
2198       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr);
2199       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2200 
2201       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);
2202       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr);
2203       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2204 
2205       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);
2206       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr);
2207       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2208 
2209       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);
2210       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr);
2211       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2212 
2213       if (mumps->ninfo && mumps->ninfo <= 80){
2214         PetscInt i;
2215         for (i=0; i<mumps->ninfo; i++){
2216           ierr = PetscViewerASCIIPrintf(viewer, "  INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr);
2217           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr);
2218           ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
2219         }
2220       }
2221       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
2222 
2223       if (!mumps->myid) { /* information from the host */
2224         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr);
2225         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr);
2226         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr);
2227         ierr = PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr);
2228 
2229         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr);
2230         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr);
2231         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr);
2232         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr);
2233         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr);
2234         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr);
2235         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr);
2236         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr);
2237         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr);
2238         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr);
2239         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr);
2240         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr);
2241         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr);
2242         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr);
2243         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr);
2244         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr);
2245         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr);
2246         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr);
2247         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr);
2248         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr);
2249         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr);
2250         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr);
2251         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr);
2252         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr);
2253         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr);
2254         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr);
2255         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr);
2256         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr);
2257         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr);
2258         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr);
2259         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr);
2260         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr);
2261         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr);
2262         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr);
2263       }
2264     }
2265   }
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
2270 {
2271   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
2272 
2273   PetscFunctionBegin;
2274   info->block_size        = 1.0;
2275   info->nz_allocated      = mumps->id.INFOG(20);
2276   info->nz_used           = mumps->id.INFOG(20);
2277   info->nz_unneeded       = 0.0;
2278   info->assemblies        = 0.0;
2279   info->mallocs           = 0.0;
2280   info->memory            = 0.0;
2281   info->fill_ratio_given  = 0;
2282   info->fill_ratio_needed = 0;
2283   info->factor_mallocs    = 0;
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 /* -------------------------------------------------------------------------------------------*/
2288 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
2289 {
2290   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
2291   const PetscScalar *arr;
2292   const PetscInt    *idxs;
2293   PetscInt          size,i;
2294   PetscErrorCode    ierr;
2295 
2296   PetscFunctionBegin;
2297   ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr);
2298   if (mumps->petsc_size > 1) {
2299     PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
2300 
2301     ls   = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
2302     ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr);
2303     if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n");
2304   }
2305 
2306   /* Schur complement matrix */
2307   ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
2308   ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr);
2309   ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr);
2310   mumps->id.schur      = (MumpsScalar*)arr;
2311   mumps->id.size_schur = size;
2312   mumps->id.schur_lld  = size;
2313   ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr);
2314   if (mumps->sym == 1) {
2315     ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
2316   }
2317 
2318   /* MUMPS expects Fortran style indices */
2319   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
2320   ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr);
2321   ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr);
2322   for (i=0; i<size; i++) {ierr = PetscMUMPSIntCast(idxs[i]+1,&(mumps->id.listvar_schur[i]));CHKERRQ(ierr);}
2323   ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr);
2324   if (mumps->petsc_size > 1) {
2325     mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2326   } else {
2327     if (F->factortype == MAT_FACTOR_LU) {
2328       mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2329     } else {
2330       mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2331     }
2332   }
2333   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2334   mumps->id.ICNTL(26) = -1;
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 /* -------------------------------------------------------------------------------------------*/
2339 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2340 {
2341   Mat            St;
2342   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2343   PetscScalar    *array;
2344 #if defined(PETSC_USE_COMPLEX)
2345   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2346 #endif
2347   PetscErrorCode ierr;
2348 
2349   PetscFunctionBegin;
2350   if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2351   ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr);
2352   ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr);
2353   ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr);
2354   ierr = MatSetUp(St);CHKERRQ(ierr);
2355   ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr);
2356   if (!mumps->sym) { /* MUMPS always return a full matrix */
2357     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2358       PetscInt i,j,N=mumps->id.size_schur;
2359       for (i=0;i<N;i++) {
2360         for (j=0;j<N;j++) {
2361 #if !defined(PETSC_USE_COMPLEX)
2362           PetscScalar val = mumps->id.schur[i*N+j];
2363 #else
2364           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2365 #endif
2366           array[j*N+i] = val;
2367         }
2368       }
2369     } else { /* stored by columns */
2370       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2371     }
2372   } else { /* either full or lower-triangular (not packed) */
2373     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2374       PetscInt i,j,N=mumps->id.size_schur;
2375       for (i=0;i<N;i++) {
2376         for (j=i;j<N;j++) {
2377 #if !defined(PETSC_USE_COMPLEX)
2378           PetscScalar val = mumps->id.schur[i*N+j];
2379 #else
2380           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2381 #endif
2382           array[i*N+j] = val;
2383           array[j*N+i] = val;
2384         }
2385       }
2386     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2387       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2388     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2389       PetscInt i,j,N=mumps->id.size_schur;
2390       for (i=0;i<N;i++) {
2391         for (j=0;j<i+1;j++) {
2392 #if !defined(PETSC_USE_COMPLEX)
2393           PetscScalar val = mumps->id.schur[i*N+j];
2394 #else
2395           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2396 #endif
2397           array[i*N+j] = val;
2398           array[j*N+i] = val;
2399         }
2400       }
2401     }
2402   }
2403   ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr);
2404   *S   = St;
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 /* -------------------------------------------------------------------------------------------*/
2409 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2410 {
2411   PetscErrorCode ierr;
2412   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2413 
2414   PetscFunctionBegin;
2415   ierr = PetscMUMPSIntCast(ival,&mumps->id.ICNTL(icntl));CHKERRQ(ierr);
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2420 {
2421   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2422 
2423   PetscFunctionBegin;
2424   *ival = mumps->id.ICNTL(icntl);
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 /*@
2429   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2430 
2431    Logically Collective on Mat
2432 
2433    Input Parameters:
2434 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2435 .  icntl - index of MUMPS parameter array ICNTL()
2436 -  ival - value of MUMPS ICNTL(icntl)
2437 
2438   Options Database:
2439 .   -mat_mumps_icntl_<icntl> <ival>
2440 
2441    Level: beginner
2442 
2443    References:
2444 .     MUMPS Users' Guide
2445 
2446 .seealso: MatGetFactor(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2447  @*/
2448 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2449 {
2450   PetscErrorCode ierr;
2451 
2452   PetscFunctionBegin;
2453   PetscValidType(F,1);
2454   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2455   PetscValidLogicalCollectiveInt(F,icntl,2);
2456   PetscValidLogicalCollectiveInt(F,ival,3);
2457   ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 /*@
2462   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2463 
2464    Logically Collective on Mat
2465 
2466    Input Parameters:
2467 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2468 -  icntl - index of MUMPS parameter array ICNTL()
2469 
2470   Output Parameter:
2471 .  ival - value of MUMPS ICNTL(icntl)
2472 
2473    Level: beginner
2474 
2475    References:
2476 .     MUMPS Users' Guide
2477 
2478 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2479 @*/
2480 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2481 {
2482   PetscErrorCode ierr;
2483 
2484   PetscFunctionBegin;
2485   PetscValidType(F,1);
2486   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2487   PetscValidLogicalCollectiveInt(F,icntl,2);
2488   PetscValidIntPointer(ival,3);
2489   ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2490   PetscFunctionReturn(0);
2491 }
2492 
2493 /* -------------------------------------------------------------------------------------------*/
2494 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2495 {
2496   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2497 
2498   PetscFunctionBegin;
2499   mumps->id.CNTL(icntl) = val;
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2504 {
2505   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2506 
2507   PetscFunctionBegin;
2508   *val = mumps->id.CNTL(icntl);
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 /*@
2513   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2514 
2515    Logically Collective on Mat
2516 
2517    Input Parameters:
2518 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2519 .  icntl - index of MUMPS parameter array CNTL()
2520 -  val - value of MUMPS CNTL(icntl)
2521 
2522   Options Database:
2523 .   -mat_mumps_cntl_<icntl> <val>
2524 
2525    Level: beginner
2526 
2527    References:
2528 .     MUMPS Users' Guide
2529 
2530 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2531 @*/
2532 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2533 {
2534   PetscErrorCode ierr;
2535 
2536   PetscFunctionBegin;
2537   PetscValidType(F,1);
2538   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2539   PetscValidLogicalCollectiveInt(F,icntl,2);
2540   PetscValidLogicalCollectiveReal(F,val,3);
2541   ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 /*@
2546   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2547 
2548    Logically Collective on Mat
2549 
2550    Input Parameters:
2551 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2552 -  icntl - index of MUMPS parameter array CNTL()
2553 
2554   Output Parameter:
2555 .  val - value of MUMPS CNTL(icntl)
2556 
2557    Level: beginner
2558 
2559    References:
2560 .      MUMPS Users' Guide
2561 
2562 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2563 @*/
2564 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2565 {
2566   PetscErrorCode ierr;
2567 
2568   PetscFunctionBegin;
2569   PetscValidType(F,1);
2570   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2571   PetscValidLogicalCollectiveInt(F,icntl,2);
2572   PetscValidRealPointer(val,3);
2573   ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2578 {
2579   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2580 
2581   PetscFunctionBegin;
2582   *info = mumps->id.INFO(icntl);
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2587 {
2588   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2589 
2590   PetscFunctionBegin;
2591   *infog = mumps->id.INFOG(icntl);
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2596 {
2597   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2598 
2599   PetscFunctionBegin;
2600   *rinfo = mumps->id.RINFO(icntl);
2601   PetscFunctionReturn(0);
2602 }
2603 
2604 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2605 {
2606   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2607 
2608   PetscFunctionBegin;
2609   *rinfog = mumps->id.RINFOG(icntl);
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2614 {
2615   PetscErrorCode ierr;
2616   Mat            Bt = NULL,Btseq = NULL;
2617   PetscBool      flg;
2618   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2619   PetscScalar    *aa;
2620   PetscInt       spnr,*ia,*ja,M,nrhs;
2621 
2622   PetscFunctionBegin;
2623   PetscValidIntPointer(spRHS,2);
2624   ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr);
2625   if (flg) {
2626     ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr);
2627   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2628 
2629   ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr);
2630 
2631   if (mumps->petsc_size > 1) {
2632     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2633     Btseq = b->A;
2634   } else {
2635     Btseq = Bt;
2636   }
2637 
2638   ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr);
2639   mumps->id.nrhs = nrhs;
2640   mumps->id.lrhs = M;
2641   mumps->id.rhs  = NULL;
2642 
2643   if (!mumps->myid) {
2644     ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr);
2645     ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2646     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2647     ierr = PetscMUMPSIntCSRCast(mumps,spnr,ia,ja,&mumps->id.irhs_ptr,&mumps->id.irhs_sparse,&mumps->id.nz_rhs);CHKERRQ(ierr);
2648     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2649   } else {
2650     mumps->id.irhs_ptr    = NULL;
2651     mumps->id.irhs_sparse = NULL;
2652     mumps->id.nz_rhs      = 0;
2653     mumps->id.rhs_sparse  = NULL;
2654   }
2655   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2656   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2657 
2658   /* solve phase */
2659   /*-------------*/
2660   mumps->id.job = JOB_SOLVE;
2661   PetscMUMPS_c(mumps);
2662   if (mumps->id.INFOG(1) < 0)
2663     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
2664 
2665   if (!mumps->myid) {
2666     ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr);
2667     ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2668     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2669   }
2670   PetscFunctionReturn(0);
2671 }
2672 
2673 /*@
2674   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2675 
2676    Logically Collective on Mat
2677 
2678    Input Parameters:
2679 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2680 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2681 
2682   Output Parameter:
2683 . spRHS - requested entries of inverse of A
2684 
2685    Level: beginner
2686 
2687    References:
2688 .      MUMPS Users' Guide
2689 
2690 .seealso: MatGetFactor(), MatCreateTranspose()
2691 @*/
2692 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2693 {
2694   PetscErrorCode ierr;
2695 
2696   PetscFunctionBegin;
2697   PetscValidType(F,1);
2698   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2699   ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr);
2700   PetscFunctionReturn(0);
2701 }
2702 
2703 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2704 {
2705   PetscErrorCode ierr;
2706   Mat            spRHS;
2707 
2708   PetscFunctionBegin;
2709   ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr);
2710   ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr);
2711   ierr = MatDestroy(&spRHS);CHKERRQ(ierr);
2712   PetscFunctionReturn(0);
2713 }
2714 
2715 /*@
2716   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2717 
2718    Logically Collective on Mat
2719 
2720    Input Parameters:
2721 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2722 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2723 
2724   Output Parameter:
2725 . spRHST - requested entries of inverse of A^T
2726 
2727    Level: beginner
2728 
2729    References:
2730 .      MUMPS Users' Guide
2731 
2732 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2733 @*/
2734 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2735 {
2736   PetscErrorCode ierr;
2737   PetscBool      flg;
2738 
2739   PetscFunctionBegin;
2740   PetscValidType(F,1);
2741   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2742   ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
2743   if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2744 
2745   ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr);
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 /*@
2750   MatMumpsGetInfo - Get MUMPS parameter INFO()
2751 
2752    Logically Collective on Mat
2753 
2754    Input Parameters:
2755 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2756 -  icntl - index of MUMPS parameter array INFO()
2757 
2758   Output Parameter:
2759 .  ival - value of MUMPS INFO(icntl)
2760 
2761    Level: beginner
2762 
2763    References:
2764 .      MUMPS Users' Guide
2765 
2766 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2767 @*/
2768 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2769 {
2770   PetscErrorCode ierr;
2771 
2772   PetscFunctionBegin;
2773   PetscValidType(F,1);
2774   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2775   PetscValidIntPointer(ival,3);
2776   ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 /*@
2781   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2782 
2783    Logically Collective on Mat
2784 
2785    Input Parameters:
2786 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2787 -  icntl - index of MUMPS parameter array INFOG()
2788 
2789   Output Parameter:
2790 .  ival - value of MUMPS INFOG(icntl)
2791 
2792    Level: beginner
2793 
2794    References:
2795 .      MUMPS Users' Guide
2796 
2797 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2798 @*/
2799 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2800 {
2801   PetscErrorCode ierr;
2802 
2803   PetscFunctionBegin;
2804   PetscValidType(F,1);
2805   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2806   PetscValidIntPointer(ival,3);
2807   ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2808   PetscFunctionReturn(0);
2809 }
2810 
2811 /*@
2812   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2813 
2814    Logically Collective on Mat
2815 
2816    Input Parameters:
2817 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2818 -  icntl - index of MUMPS parameter array RINFO()
2819 
2820   Output Parameter:
2821 .  val - value of MUMPS RINFO(icntl)
2822 
2823    Level: beginner
2824 
2825    References:
2826 .       MUMPS Users' Guide
2827 
2828 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfog()
2829 @*/
2830 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2831 {
2832   PetscErrorCode ierr;
2833 
2834   PetscFunctionBegin;
2835   PetscValidType(F,1);
2836   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2837   PetscValidRealPointer(val,3);
2838   ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2839   PetscFunctionReturn(0);
2840 }
2841 
2842 /*@
2843   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2844 
2845    Logically Collective on Mat
2846 
2847    Input Parameters:
2848 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2849 -  icntl - index of MUMPS parameter array RINFOG()
2850 
2851   Output Parameter:
2852 .  val - value of MUMPS RINFOG(icntl)
2853 
2854    Level: beginner
2855 
2856    References:
2857 .      MUMPS Users' Guide
2858 
2859 .seealso: MatGetFactor(), MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo()
2860 @*/
2861 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2862 {
2863   PetscErrorCode ierr;
2864 
2865   PetscFunctionBegin;
2866   PetscValidType(F,1);
2867   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2868   PetscValidRealPointer(val,3);
2869   ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2870   PetscFunctionReturn(0);
2871 }
2872 
2873 /*MC
2874   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2875   distributed and sequential matrices via the external package MUMPS.
2876 
2877   Works with MATAIJ and MATSBAIJ matrices
2878 
2879   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2880 
2881   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2882 
2883   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2884 
2885   Options Database Keys:
2886 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2887 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2888 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2889 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2890 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2891 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis
2892 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2893 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2894 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2895 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2896 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2897 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2898 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2899 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2900 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2901 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2902 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2903 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2904 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2905 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2906 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2907 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2908 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2909 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2910 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2911 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2912 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2913 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2914 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2915 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2916 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2917 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2918 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2919                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2920 
2921   Level: beginner
2922 
2923     Notes:
2924     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2925 
2926     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2927 $          KSPGetPC(ksp,&pc);
2928 $          PCFactorGetMatrix(pc,&mat);
2929 $          MatMumpsGetInfo(mat,....);
2930 $          MatMumpsGetInfog(mat,....); etc.
2931            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2932 
2933    Two modes to run MUMPS/PETSc with OpenMP
2934 
2935 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2936 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2937 
2938 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2939 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2940 
2941    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2942    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2943    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2944    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2945    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2946 
2947    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2948    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2949    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2950    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2951    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2952    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2953    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2954    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2955    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2956    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2957    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2958    examine the mapping result.
2959 
2960    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2961    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2962    calls omp_set_num_threads(m) internally before calling MUMPS.
2963 
2964    References:
2965 +   1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2966 -   2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2967 
2968 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetIcntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix()
2969 
2970 M*/
2971 
2972 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2973 {
2974   PetscFunctionBegin;
2975   *type = MATSOLVERMUMPS;
2976   PetscFunctionReturn(0);
2977 }
2978 
2979 /* MatGetFactor for Seq and MPI AIJ matrices */
2980 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
2981 {
2982   Mat            B;
2983   PetscErrorCode ierr;
2984   Mat_MUMPS      *mumps;
2985   PetscBool      isSeqAIJ;
2986 
2987   PetscFunctionBegin;
2988  #if defined(PETSC_USE_COMPLEX)
2989   if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
2990  #endif
2991   /* Create the factorization matrix */
2992   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
2993   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2994   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2995   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
2996   ierr = MatSetUp(B);CHKERRQ(ierr);
2997 
2998   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
2999 
3000   B->ops->view    = MatView_MUMPS;
3001   B->ops->getinfo = MatGetInfo_MUMPS;
3002 
3003   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3004   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3005   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3006   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3007   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3008   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3009   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3010   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3011   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3012   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3013   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3014   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3015   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3016 
3017   if (ftype == MAT_FACTOR_LU) {
3018     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3019     B->factortype            = MAT_FACTOR_LU;
3020     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
3021     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
3022     mumps->sym = 0;
3023   } else {
3024     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3025     B->factortype                  = MAT_FACTOR_CHOLESKY;
3026     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
3027     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
3028 #if defined(PETSC_USE_COMPLEX)
3029     mumps->sym = 2;
3030 #else
3031     if (A->spd_set && A->spd) mumps->sym = 1;
3032     else                      mumps->sym = 2;
3033 #endif
3034   }
3035 
3036   /* set solvertype */
3037   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3038   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3039 
3040   B->ops->destroy = MatDestroy_MUMPS;
3041   B->data         = (void*)mumps;
3042 
3043   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3044 
3045   *F = B;
3046   PetscFunctionReturn(0);
3047 }
3048 
3049 /* MatGetFactor for Seq and MPI SBAIJ matrices */
3050 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
3051 {
3052   Mat            B;
3053   PetscErrorCode ierr;
3054   Mat_MUMPS      *mumps;
3055   PetscBool      isSeqSBAIJ;
3056 
3057   PetscFunctionBegin;
3058  #if defined(PETSC_USE_COMPLEX)
3059   if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
3060  #endif
3061   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3062   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3063   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3064   ierr = MatSetUp(B);CHKERRQ(ierr);
3065 
3066   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3067   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr);
3068   if (isSeqSBAIJ) {
3069     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
3070   } else {
3071     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
3072   }
3073 
3074   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
3075   B->ops->view                   = MatView_MUMPS;
3076   B->ops->getinfo                = MatGetInfo_MUMPS;
3077 
3078   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3079   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3080   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3081   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3082   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3083   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3084   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3085   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3086   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3087   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3088   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3089   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3090   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3091 
3092   B->factortype = MAT_FACTOR_CHOLESKY;
3093 #if defined(PETSC_USE_COMPLEX)
3094   mumps->sym = 2;
3095 #else
3096   if (A->spd_set && A->spd) mumps->sym = 1;
3097   else                      mumps->sym = 2;
3098 #endif
3099 
3100   /* set solvertype */
3101   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3102   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3103 
3104   B->ops->destroy = MatDestroy_MUMPS;
3105   B->data         = (void*)mumps;
3106 
3107   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3108 
3109   *F = B;
3110   PetscFunctionReturn(0);
3111 }
3112 
3113 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
3114 {
3115   Mat            B;
3116   PetscErrorCode ierr;
3117   Mat_MUMPS      *mumps;
3118   PetscBool      isSeqBAIJ;
3119 
3120   PetscFunctionBegin;
3121   /* Create the factorization matrix */
3122   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr);
3123   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3124   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3125   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3126   ierr = MatSetUp(B);CHKERRQ(ierr);
3127 
3128   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3129   if (ftype == MAT_FACTOR_LU) {
3130     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
3131     B->factortype            = MAT_FACTOR_LU;
3132     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
3133     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
3134     mumps->sym = 0;
3135   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n");
3136 
3137   B->ops->view        = MatView_MUMPS;
3138   B->ops->getinfo     = MatGetInfo_MUMPS;
3139 
3140   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3141   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3142   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3143   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3144   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3145   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3146   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3147   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3148   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3149   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3150   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3151   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
3152   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
3153 
3154   /* set solvertype */
3155   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3156   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3157 
3158   B->ops->destroy = MatDestroy_MUMPS;
3159   B->data         = (void*)mumps;
3160 
3161   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3162 
3163   *F = B;
3164   PetscFunctionReturn(0);
3165 }
3166 
3167 /* MatGetFactor for Seq and MPI SELL matrices */
3168 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
3169 {
3170   Mat            B;
3171   PetscErrorCode ierr;
3172   Mat_MUMPS      *mumps;
3173   PetscBool      isSeqSELL;
3174 
3175   PetscFunctionBegin;
3176   /* Create the factorization matrix */
3177   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr);
3178   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
3179   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
3180   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
3181   ierr = MatSetUp(B);CHKERRQ(ierr);
3182 
3183   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
3184 
3185   B->ops->view        = MatView_MUMPS;
3186   B->ops->getinfo     = MatGetInfo_MUMPS;
3187 
3188   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
3189   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
3190   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
3191   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
3192   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
3193   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
3194   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
3195   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
3196   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
3197   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
3198   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
3199 
3200   if (ftype == MAT_FACTOR_LU) {
3201     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
3202     B->factortype            = MAT_FACTOR_LU;
3203     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
3204     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3205     mumps->sym = 0;
3206   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
3207 
3208   /* set solvertype */
3209   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
3210   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
3211 
3212   B->ops->destroy = MatDestroy_MUMPS;
3213   B->data         = (void*)mumps;
3214 
3215   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
3216 
3217   *F = B;
3218   PetscFunctionReturn(0);
3219 }
3220 
3221 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
3222 {
3223   PetscErrorCode ierr;
3224 
3225   PetscFunctionBegin;
3226   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3227   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3228   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3229   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3230   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3231   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3232   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
3233   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3234   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
3235   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
3236   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr);
3237   PetscFunctionReturn(0);
3238 }
3239 
3240