xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 257f4e5aa63fce47b93517cb665016a4c8ee7c55)
1 
2 /*
3     Provides an interface to the MUMPS sparse solver
4 */
5 
6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I  "petscmat.h"  I*/
7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
8 #include <../src/mat/impls/sell/mpi/mpisell.h>
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #if defined(PETSC_USE_REAL_SINGLE)
13 #include <cmumps_c.h>
14 #else
15 #include <zmumps_c.h>
16 #endif
17 #else
18 #if defined(PETSC_USE_REAL_SINGLE)
19 #include <smumps_c.h>
20 #else
21 #include <dmumps_c.h>
22 #endif
23 #endif
24 EXTERN_C_END
25 #define JOB_INIT -1
26 #define JOB_FACTSYMBOLIC 1
27 #define JOB_FACTNUMERIC 2
28 #define JOB_SOLVE 3
29 #define JOB_END -2
30 
31 /* calls to MUMPS */
32 #if defined(PETSC_USE_COMPLEX)
33 #if defined(PETSC_USE_REAL_SINGLE)
34 #define MUMPS_c cmumps_c
35 #else
36 #define MUMPS_c zmumps_c
37 #endif
38 #else
39 #if defined(PETSC_USE_REAL_SINGLE)
40 #define MUMPS_c smumps_c
41 #else
42 #define MUMPS_c dmumps_c
43 #endif
44 #endif
45 
46 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */
47 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
48 #define PetscMUMPS_c(mumps) \
49   do { \
50     if (mumps->use_petsc_omp_support) { \
51       if (mumps->is_omp_master) { \
52         ierr = PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl);CHKERRQ(ierr); \
53         MUMPS_c(&mumps->id); \
54         ierr = PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl);CHKERRQ(ierr); \
55       } \
56       ierr = PetscOmpCtrlBarrier(mumps->omp_ctrl);CHKERRQ(ierr); \
57       /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific      \
58          to processes, so we only Bcast info[1], an error code and leave others (since they do not have   \
59          an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82.                   \
60          omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \
61       */ \
62       ierr = MPI_Bcast(mumps->id.infog, 40,MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr);  \
63       ierr = MPI_Bcast(mumps->id.rinfog,20,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr); \
64       ierr = MPI_Bcast(mumps->id.info,  1, MPI_INT, 0,mumps->omp_comm);CHKERRQ(ierr);  \
65     } else { \
66       MUMPS_c(&mumps->id); \
67     } \
68   } while(0)
69 #else
70 #define PetscMUMPS_c(mumps) \
71   do { MUMPS_c(&mumps->id); } while (0)
72 #endif
73 
74 /* declare MumpsScalar */
75 #if defined(PETSC_USE_COMPLEX)
76 #if defined(PETSC_USE_REAL_SINGLE)
77 #define MumpsScalar mumps_complex
78 #else
79 #define MumpsScalar mumps_double_complex
80 #endif
81 #else
82 #define MumpsScalar PetscScalar
83 #endif
84 
85 /* macros s.t. indices match MUMPS documentation */
86 #define ICNTL(I) icntl[(I)-1]
87 #define CNTL(I) cntl[(I)-1]
88 #define INFOG(I) infog[(I)-1]
89 #define INFO(I) info[(I)-1]
90 #define RINFOG(I) rinfog[(I)-1]
91 #define RINFO(I) rinfo[(I)-1]
92 
93 typedef struct {
94 #if defined(PETSC_USE_COMPLEX)
95 #if defined(PETSC_USE_REAL_SINGLE)
96   CMUMPS_STRUC_C id;
97 #else
98   ZMUMPS_STRUC_C id;
99 #endif
100 #else
101 #if defined(PETSC_USE_REAL_SINGLE)
102   SMUMPS_STRUC_C id;
103 #else
104   DMUMPS_STRUC_C id;
105 #endif
106 #endif
107 
108   MatStructure matstruc;
109   PetscMPIInt  myid,petsc_size;
110   PetscInt     *irn,*jcn,nz,sym;
111   PetscScalar  *val;
112   MPI_Comm     mumps_comm;
113   PetscInt     ICNTL9_pre;           /* check if ICNTL(9) is changed from previous MatSolve */
114   VecScatter   scat_rhs, scat_sol;   /* used by MatSolve() */
115   Vec          b_seq,x_seq;
116   PetscInt     ninfo,*info;          /* display INFO */
117   PetscInt     sizeredrhs;
118   PetscScalar  *schur_sol;
119   PetscInt     schur_sizesol;
120 
121   PetscBool    use_petsc_omp_support;
122   PetscOmpCtrl omp_ctrl;             /* an OpenMP controler that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */
123   MPI_Comm     petsc_comm,omp_comm;  /* petsc_comm is petsc matrix's comm */
124   PetscMPIInt  mpinz;                /* on master rank, nz = sum(mpinz) over omp_comm; on other ranks, mpinz = nz*/
125   PetscMPIInt  omp_comm_size;
126   PetscBool    is_omp_master;        /* is this rank the master of omp_comm */
127   PetscMPIInt  *recvcount,*displs;
128 
129   PetscErrorCode (*ConvertToTriples)(Mat, int, MatReuse, int*, int**, int**, PetscScalar**);
130 } Mat_MUMPS;
131 
132 extern PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*);
133 
134 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS* mumps)
135 {
136   PetscErrorCode ierr;
137 
138   PetscFunctionBegin;
139   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
140   ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
141   ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
142   mumps->id.size_schur = 0;
143   mumps->id.schur_lld  = 0;
144   mumps->id.ICNTL(19)  = 0;
145   PetscFunctionReturn(0);
146 }
147 
148 /* solve with rhs in mumps->id.redrhs and return in the same location */
149 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F)
150 {
151   Mat_MUMPS            *mumps=(Mat_MUMPS*)F->data;
152   Mat                  S,B,X;
153   MatFactorSchurStatus schurstatus;
154   PetscInt             sizesol;
155   PetscErrorCode       ierr;
156 
157   PetscFunctionBegin;
158   ierr = MatFactorFactorizeSchurComplement(F);CHKERRQ(ierr);
159   ierr = MatFactorGetSchurComplement(F,&S,&schurstatus);CHKERRQ(ierr);
160   ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&B);CHKERRQ(ierr);
161   ierr = MatSetType(B,((PetscObject)S)->type_name);CHKERRQ(ierr);
162 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
163   ierr = MatBindToCPU(B,S->boundtocpu);CHKERRQ(ierr);
164 #endif
165   switch (schurstatus) {
166   case MAT_FACTOR_SCHUR_FACTORED:
167     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,(PetscScalar*)mumps->id.redrhs,&X);CHKERRQ(ierr);
168     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
169 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
170     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
171 #endif
172     if (!mumps->id.ICNTL(9)) { /* transpose solve */
173       ierr = MatMatSolveTranspose(S,B,X);CHKERRQ(ierr);
174     } else {
175       ierr = MatMatSolve(S,B,X);CHKERRQ(ierr);
176     }
177     break;
178   case MAT_FACTOR_SCHUR_INVERTED:
179     sizesol = mumps->id.nrhs*mumps->id.size_schur;
180     if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) {
181       ierr = PetscFree(mumps->schur_sol);CHKERRQ(ierr);
182       ierr = PetscMalloc1(sizesol,&mumps->schur_sol);CHKERRQ(ierr);
183       mumps->schur_sizesol = sizesol;
184     }
185     ierr = MatCreateSeqDense(PETSC_COMM_SELF,mumps->id.size_schur,mumps->id.nrhs,mumps->schur_sol,&X);CHKERRQ(ierr);
186     ierr = MatSetType(X,((PetscObject)S)->type_name);CHKERRQ(ierr);
187 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
188     ierr = MatBindToCPU(X,S->boundtocpu);CHKERRQ(ierr);
189 #endif
190     if (!mumps->id.ICNTL(9)) { /* transpose solve */
191       ierr = MatTransposeMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr);
192     } else {
193       ierr = MatMatMult(S,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&X);CHKERRQ(ierr);
194     }
195     ierr = MatCopy(X,B,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
196     break;
197   default:
198     SETERRQ1(PetscObjectComm((PetscObject)F),PETSC_ERR_SUP,"Unhandled MatFactorSchurStatus %D",F->schur_status);
199     break;
200   }
201   ierr = MatFactorRestoreSchurComplement(F,&S,schurstatus);CHKERRQ(ierr);
202   ierr = MatDestroy(&B);CHKERRQ(ierr);
203   ierr = MatDestroy(&X);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion)
208 {
209   Mat_MUMPS     *mumps=(Mat_MUMPS*)F->data;
210   PetscErrorCode ierr;
211 
212   PetscFunctionBegin;
213   if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */
214     PetscFunctionReturn(0);
215   }
216   if (!expansion) { /* prepare for the condensation step */
217     PetscInt sizeredrhs = mumps->id.nrhs*mumps->id.size_schur;
218     /* allocate MUMPS internal array to store reduced right-hand sides */
219     if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) {
220       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
221       mumps->id.lredrhs = mumps->id.size_schur;
222       ierr = PetscMalloc1(mumps->id.nrhs*mumps->id.lredrhs,&mumps->id.redrhs);CHKERRQ(ierr);
223       mumps->sizeredrhs = mumps->id.nrhs*mumps->id.lredrhs;
224     }
225     mumps->id.ICNTL(26) = 1; /* condensation phase */
226   } else { /* prepare for the expansion step */
227     /* solve Schur complement (this has to be done by the MUMPS user, so basically us) */
228     ierr = MatMumpsSolveSchur_Private(F);CHKERRQ(ierr);
229     mumps->id.ICNTL(26) = 2; /* expansion phase */
230     PetscMUMPS_c(mumps);
231     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
232     /* restore defaults */
233     mumps->id.ICNTL(26) = -1;
234     /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */
235     if (mumps->id.nrhs > 1) {
236       ierr = PetscFree(mumps->id.redrhs);CHKERRQ(ierr);
237       mumps->id.lredrhs = 0;
238       mumps->sizeredrhs = 0;
239     }
240   }
241   PetscFunctionReturn(0);
242 }
243 
244 /*
245   MatConvertToTriples_A_B - convert Petsc matrix to triples: row[nz], col[nz], val[nz]
246 
247   input:
248     A       - matrix in aij,baij or sbaij format
249     shift   - 0: C style output triple; 1: Fortran style output triple.
250     reuse   - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple
251               MAT_REUSE_MATRIX:   only the values in v array are updated
252   output:
253     nnz     - dim of r, c, and v (number of local nonzero entries of A)
254     r, c, v - row and col index, matrix values (matrix triples)
255 
256   The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is
257   freed with PetscFree(mumps->irn);  This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means
258   that the PetscMalloc() cannot easily be replaced with a PetscMalloc3().
259 
260  */
261 
262 PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
263 {
264   const PetscScalar *av;
265   const PetscInt    *ai,*aj,*ajj,M=A->rmap->n;
266   PetscInt          nz,rnz,i,j;
267   PetscErrorCode    ierr;
268   PetscInt          *row,*col;
269   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
270 
271   PetscFunctionBegin;
272   ierr = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
273   *v   = (PetscScalar*)av;
274   if (reuse == MAT_INITIAL_MATRIX) {
275     nz   = aa->nz;
276     ai   = aa->i;
277     aj   = aa->j;
278     *nnz = nz;
279     ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr);
280     col  = row + nz;
281 
282     nz = 0;
283     for (i=0; i<M; i++) {
284       rnz = ai[i+1] - ai[i];
285       ajj = aj + ai[i];
286       for (j=0; j<rnz; j++) {
287         row[nz] = i+shift; col[nz++] = ajj[j] + shift;
288       }
289     }
290     *r = row; *c = col;
291   }
292   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
293   PetscFunctionReturn(0);
294 }
295 
296 PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
297 {
298   Mat_SeqSELL *a=(Mat_SeqSELL*)A->data;
299   PetscInt    *ptr;
300 
301   PetscFunctionBegin;
302   *v = a->val;
303   if (reuse == MAT_INITIAL_MATRIX) {
304     PetscInt       nz,i,j,row;
305     PetscErrorCode ierr;
306 
307     nz   = a->sliidx[a->totalslices];
308     *nnz = nz;
309     ierr = PetscMalloc1(2*nz, &ptr);CHKERRQ(ierr);
310     *r   = ptr;
311     *c   = ptr + nz;
312 
313     for (i=0; i<a->totalslices; i++) {
314       for (j=a->sliidx[i],row=0; j<a->sliidx[i+1]; j++,row=((row+1)&0x07)) {
315         *ptr++ = 8*i + row + shift;
316       }
317     }
318     for (i=0;i<nz;i++) *ptr++ = a->colidx[i] + shift;
319   }
320   PetscFunctionReturn(0);
321 }
322 
323 PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
324 {
325   Mat_SeqBAIJ    *aa=(Mat_SeqBAIJ*)A->data;
326   const PetscInt *ai,*aj,*ajj,bs2 = aa->bs2;
327   PetscInt       bs,M,nz,idx=0,rnz,i,j,k,m;
328   PetscErrorCode ierr;
329   PetscInt       *row,*col;
330 
331   PetscFunctionBegin;
332   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
333   M = A->rmap->N/bs;
334   *v = aa->a;
335   if (reuse == MAT_INITIAL_MATRIX) {
336     ai   = aa->i; aj = aa->j;
337     nz   = bs2*aa->nz;
338     *nnz = nz;
339     ierr = PetscMalloc1(2*nz, &row);CHKERRQ(ierr);
340     col  = row + nz;
341 
342     for (i=0; i<M; i++) {
343       ajj = aj + ai[i];
344       rnz = ai[i+1] - ai[i];
345       for (k=0; k<rnz; k++) {
346         for (j=0; j<bs; j++) {
347           for (m=0; m<bs; m++) {
348             row[idx]   = i*bs + m + shift;
349             col[idx++] = bs*(ajj[k]) + j + shift;
350           }
351         }
352       }
353     }
354     *r = row; *c = col;
355   }
356   PetscFunctionReturn(0);
357 }
358 
359 PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c,PetscScalar **v)
360 {
361   const PetscInt *ai, *aj,*ajj;
362   PetscInt        nz,rnz,i,j,k,m,bs;
363   PetscErrorCode  ierr;
364   PetscInt        *row,*col;
365   PetscScalar     *val;
366   Mat_SeqSBAIJ    *aa=(Mat_SeqSBAIJ*)A->data;
367   const PetscInt  bs2=aa->bs2,mbs=aa->mbs;
368 #if defined(PETSC_USE_COMPLEX)
369   PetscBool      hermitian;
370 #endif
371 
372   PetscFunctionBegin;
373 #if defined(PETSC_USE_COMPLEX)
374   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
375   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
376 #endif
377   ai   = aa->i;
378   aj   = aa->j;
379   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
380   if (reuse == MAT_INITIAL_MATRIX) {
381     nz   = aa->nz;
382     ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+(bs>1?bs2*nz*sizeof(PetscScalar):0)), &row);CHKERRQ(ierr);
383     col  = row + bs2*nz;
384     if (bs>1) val = (PetscScalar*)(col + bs2*nz);
385     else val = aa->a;
386 
387     *r = row; *c = col; *v = val;
388   } else {
389     if (bs == 1) *v = aa->a;
390     row = *r; col = *c; val = *v;
391   }
392 
393   nz = 0;
394   if (bs>1) {
395     for (i=0; i<mbs; i++) {
396       rnz = ai[i+1] - ai[i];
397       ajj = aj + ai[i];
398       for (j=0; j<rnz; j++) {
399         for (k=0; k<bs; k++) {
400           for (m=0; m<bs; m++) {
401             if (ajj[j]>i || k>=m) {
402               if (reuse == MAT_INITIAL_MATRIX) {
403                 row[nz] = i*bs + m + shift;
404                 col[nz] = ajj[j]*bs + k + shift;
405               }
406               val[nz++] = aa->a[(ai[i]+j)*bs2 + m + k*bs];
407             }
408           }
409         }
410       }
411     }
412   } else if (reuse == MAT_INITIAL_MATRIX) {
413     for (i=0; i<mbs; i++) {
414       rnz = ai[i+1] - ai[i];
415       ajj = aj + ai[i];
416       for (j=0; j<rnz; j++) {
417         row[nz] = i+shift; col[nz++] = ajj[j] + shift;
418       }
419     }
420     if (nz != aa->nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Different numbers of nonzeros %D != %D",nz,aa->nz);
421   }
422   if (reuse == MAT_INITIAL_MATRIX) *nnz = nz;
423   PetscFunctionReturn(0);
424 }
425 
426 PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
427 {
428   const PetscInt    *ai,*aj,*ajj,*adiag,M=A->rmap->n;
429   PetscInt          nz,rnz,i,j;
430   const PetscScalar *av,*v1;
431   PetscScalar       *val;
432   PetscErrorCode    ierr;
433   PetscInt          *row,*col;
434   Mat_SeqAIJ        *aa=(Mat_SeqAIJ*)A->data;
435   PetscBool         missing;
436 #if defined(PETSC_USE_COMPLEX)
437   PetscBool         hermitian;
438 #endif
439 
440   PetscFunctionBegin;
441 #if defined(PETSC_USE_COMPLEX)
442   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
443   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
444 #endif
445   ierr  = MatSeqAIJGetArrayRead(A,&av);CHKERRQ(ierr);
446   ai    = aa->i; aj = aa->j;
447   adiag = aa->diag;
448   ierr  = MatMissingDiagonal_SeqAIJ(A,&missing,&i);CHKERRQ(ierr);
449   if (reuse == MAT_INITIAL_MATRIX) {
450     /* count nz in the upper triangular part of A */
451     nz = 0;
452     if (missing) {
453       for (i=0; i<M; i++) {
454         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
455           for (j=ai[i];j<ai[i+1];j++) {
456             if (aj[j] < i) continue;
457             nz++;
458           }
459         } else {
460           nz += ai[i+1] - adiag[i];
461         }
462       }
463     } else {
464       for (i=0; i<M; i++) nz += ai[i+1] - adiag[i];
465     }
466     *nnz = nz;
467 
468     ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr);
469     col  = row + nz;
470     val  = (PetscScalar*)(col + nz);
471 
472     nz = 0;
473     if (missing) {
474       for (i=0; i<M; i++) {
475         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
476           for (j=ai[i];j<ai[i+1];j++) {
477             if (aj[j] < i) continue;
478             row[nz] = i+shift;
479             col[nz] = aj[j]+shift;
480             val[nz] = av[j];
481             nz++;
482           }
483         } else {
484           rnz = ai[i+1] - adiag[i];
485           ajj = aj + adiag[i];
486           v1  = av + adiag[i];
487           for (j=0; j<rnz; j++) {
488             row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j];
489           }
490         }
491       }
492     } else {
493       for (i=0; i<M; i++) {
494         rnz = ai[i+1] - adiag[i];
495         ajj = aj + adiag[i];
496         v1  = av + adiag[i];
497         for (j=0; j<rnz; j++) {
498           row[nz] = i+shift; col[nz] = ajj[j] + shift; val[nz++] = v1[j];
499         }
500       }
501     }
502     *r = row; *c = col; *v = val;
503   } else {
504     nz = 0; val = *v;
505     if (missing) {
506       for (i=0; i <M; i++) {
507         if (PetscUnlikely(adiag[i] >= ai[i+1])) {
508           for (j=ai[i];j<ai[i+1];j++) {
509             if (aj[j] < i) continue;
510             val[nz++] = av[j];
511           }
512         } else {
513           rnz = ai[i+1] - adiag[i];
514           v1  = av + adiag[i];
515           for (j=0; j<rnz; j++) {
516             val[nz++] = v1[j];
517           }
518         }
519       }
520     } else {
521       for (i=0; i <M; i++) {
522         rnz = ai[i+1] - adiag[i];
523         v1  = av + adiag[i];
524         for (j=0; j<rnz; j++) {
525           val[nz++] = v1[j];
526         }
527       }
528     }
529   }
530   ierr = MatSeqAIJRestoreArrayRead(A,&av);CHKERRQ(ierr);
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r,int **c, PetscScalar **v)
535 {
536   const PetscInt    *ai,*aj,*bi,*bj,*garray,*ajj,*bjj;
537   PetscErrorCode    ierr;
538   PetscInt          rstart,nz,bs,i,j,k,m,jj,irow,countA,countB;
539   PetscInt          *row,*col;
540   const PetscScalar *av,*bv,*v1,*v2;
541   PetscScalar       *val;
542   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)A->data;
543   Mat_SeqSBAIJ      *aa  = (Mat_SeqSBAIJ*)(mat->A)->data;
544   Mat_SeqBAIJ       *bb  = (Mat_SeqBAIJ*)(mat->B)->data;
545   const PetscInt    bs2=aa->bs2,mbs=aa->mbs;
546 #if defined(PETSC_USE_COMPLEX)
547   PetscBool         hermitian;
548 #endif
549 
550   PetscFunctionBegin;
551 #if defined(PETSC_USE_COMPLEX)
552   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
553   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
554 #endif
555   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
556   rstart = A->rmap->rstart;
557   ai = aa->i;
558   aj = aa->j;
559   bi = bb->i;
560   bj = bb->j;
561   av = aa->a;
562   bv = bb->a;
563 
564   garray = mat->garray;
565 
566   if (reuse == MAT_INITIAL_MATRIX) {
567     nz   = aa->nz + bb->nz;
568     ierr = PetscMalloc((2*bs2*nz*sizeof(PetscInt)+bs2*nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr);
569     col  = row + bs2*nz;
570     val  = (PetscScalar*)(col + bs2*nz);
571 
572     *r = row; *c = col; *v = val;
573   } else {
574     row = *r; col = *c; val = *v;
575   }
576 
577   jj = 0; irow = rstart;
578   for (i=0; i<mbs; i++) {
579     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
580     countA = ai[i+1] - ai[i];
581     countB = bi[i+1] - bi[i];
582     bjj    = bj + bi[i];
583     v1     = av + ai[i]*bs2;
584     v2     = bv + bi[i]*bs2;
585 
586     if (bs>1) {
587       /* A-part */
588       for (j=0; j<countA; j++) {
589         for (k=0; k<bs; k++) {
590           for (m=0; m<bs; m++) {
591             if (rstart + ajj[j]*bs>irow || k>=m) {
592               if (reuse == MAT_INITIAL_MATRIX) {
593                 row[jj] = irow + m + shift; col[jj] = rstart + ajj[j]*bs + k + shift;
594               }
595               val[jj++] = v1[j*bs2 + m + k*bs];
596             }
597           }
598         }
599       }
600 
601       /* B-part */
602       for (j=0; j < countB; j++) {
603         for (k=0; k<bs; k++) {
604           for (m=0; m<bs; m++) {
605             if (reuse == MAT_INITIAL_MATRIX) {
606               row[jj] = irow + m + shift; col[jj] = garray[bjj[j]]*bs + k + shift;
607             }
608             val[jj++] = v2[j*bs2 + m + k*bs];
609           }
610         }
611       }
612     } else {
613       /* A-part */
614       for (j=0; j<countA; j++) {
615         if (reuse == MAT_INITIAL_MATRIX) {
616           row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
617         }
618         val[jj++] = v1[j];
619       }
620 
621       /* B-part */
622       for (j=0; j < countB; j++) {
623         if (reuse == MAT_INITIAL_MATRIX) {
624           row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
625         }
626         val[jj++] = v2[j];
627       }
628     }
629     irow+=bs;
630   }
631   *nnz = jj;
632   PetscFunctionReturn(0);
633 }
634 
635 PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
636 {
637   const PetscInt    *ai, *aj, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
638   PetscErrorCode    ierr;
639   PetscInt          rstart,nz,i,j,jj,irow,countA,countB;
640   PetscInt          *row,*col;
641   const PetscScalar *av, *bv,*v1,*v2;
642   PetscScalar       *val;
643   Mat               Ad,Ao;
644   Mat_SeqAIJ        *aa;
645   Mat_SeqAIJ        *bb;
646 
647   PetscFunctionBegin;
648   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
649   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
650   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
651 
652   aa = (Mat_SeqAIJ*)(Ad)->data;
653   bb = (Mat_SeqAIJ*)(Ao)->data;
654   ai = aa->i;
655   aj = aa->j;
656   bi = bb->i;
657   bj = bb->j;
658 
659   rstart = A->rmap->rstart;
660 
661   if (reuse == MAT_INITIAL_MATRIX) {
662     nz   = aa->nz + bb->nz;
663     *nnz = nz;
664     ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr);
665     col  = row + nz;
666     val  = (PetscScalar*)(col + nz);
667 
668     *r = row; *c = col; *v = val;
669   } else {
670     row = *r; col = *c; val = *v;
671   }
672 
673   jj = 0; irow = rstart;
674   for (i=0; i<m; i++) {
675     ajj    = aj + ai[i];                 /* ptr to the beginning of this row */
676     countA = ai[i+1] - ai[i];
677     countB = bi[i+1] - bi[i];
678     bjj    = bj + bi[i];
679     v1     = av + ai[i];
680     v2     = bv + bi[i];
681 
682     /* A-part */
683     for (j=0; j<countA; j++) {
684       if (reuse == MAT_INITIAL_MATRIX) {
685         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
686       }
687       val[jj++] = v1[j];
688     }
689 
690     /* B-part */
691     for (j=0; j < countB; j++) {
692       if (reuse == MAT_INITIAL_MATRIX) {
693         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
694       }
695       val[jj++] = v2[j];
696     }
697     irow++;
698   }
699   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
700   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
705 {
706   Mat_MPIBAIJ       *mat    = (Mat_MPIBAIJ*)A->data;
707   Mat_SeqBAIJ       *aa     = (Mat_SeqBAIJ*)(mat->A)->data;
708   Mat_SeqBAIJ       *bb     = (Mat_SeqBAIJ*)(mat->B)->data;
709   const PetscInt    *ai     = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j,*ajj, *bjj;
710   const PetscInt    *garray = mat->garray,mbs=mat->mbs,rstart=A->rmap->rstart;
711   const PetscInt    bs2=mat->bs2;
712   PetscErrorCode    ierr;
713   PetscInt          bs,nz,i,j,k,n,jj,irow,countA,countB,idx;
714   PetscInt          *row,*col;
715   const PetscScalar *av=aa->a, *bv=bb->a,*v1,*v2;
716   PetscScalar       *val;
717 
718   PetscFunctionBegin;
719   ierr = MatGetBlockSize(A,&bs);CHKERRQ(ierr);
720   if (reuse == MAT_INITIAL_MATRIX) {
721     nz   = bs2*(aa->nz + bb->nz);
722     *nnz = nz;
723     ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr);
724     col  = row + nz;
725     val  = (PetscScalar*)(col + nz);
726 
727     *r = row; *c = col; *v = val;
728   } else {
729     row = *r; col = *c; val = *v;
730   }
731 
732   jj = 0; irow = rstart;
733   for (i=0; i<mbs; i++) {
734     countA = ai[i+1] - ai[i];
735     countB = bi[i+1] - bi[i];
736     ajj    = aj + ai[i];
737     bjj    = bj + bi[i];
738     v1     = av + bs2*ai[i];
739     v2     = bv + bs2*bi[i];
740 
741     idx = 0;
742     /* A-part */
743     for (k=0; k<countA; k++) {
744       for (j=0; j<bs; j++) {
745         for (n=0; n<bs; n++) {
746           if (reuse == MAT_INITIAL_MATRIX) {
747             row[jj] = irow + n + shift;
748             col[jj] = rstart + bs*ajj[k] + j + shift;
749           }
750           val[jj++] = v1[idx++];
751         }
752       }
753     }
754 
755     idx = 0;
756     /* B-part */
757     for (k=0; k<countB; k++) {
758       for (j=0; j<bs; j++) {
759         for (n=0; n<bs; n++) {
760           if (reuse == MAT_INITIAL_MATRIX) {
761             row[jj] = irow + n + shift;
762             col[jj] = bs*garray[bjj[k]] + j + shift;
763           }
764           val[jj++] = v2[idx++];
765         }
766       }
767     }
768     irow += bs;
769   }
770   PetscFunctionReturn(0);
771 }
772 
773 PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A,int shift,MatReuse reuse,int *nnz,int **r, int **c, PetscScalar **v)
774 {
775   const PetscInt    *ai, *aj,*adiag, *bi, *bj,*garray,m=A->rmap->n,*ajj,*bjj;
776   PetscErrorCode    ierr;
777   PetscInt          rstart,nz,nza,nzb,i,j,jj,irow,countA,countB;
778   PetscInt          *row,*col;
779   const PetscScalar *av, *bv,*v1,*v2;
780   PetscScalar       *val;
781   Mat               Ad,Ao;
782   Mat_SeqAIJ        *aa;
783   Mat_SeqAIJ        *bb;
784 #if defined(PETSC_USE_COMPLEX)
785   PetscBool         hermitian;
786 #endif
787 
788   PetscFunctionBegin;
789 #if defined(PETSC_USE_COMPLEX)
790   ierr = MatGetOption(A,MAT_HERMITIAN,&hermitian);CHKERRQ(ierr);
791   if (hermitian) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MUMPS does not support Hermitian symmetric matrices for Choleksy");
792 #endif
793   ierr = MatMPIAIJGetSeqAIJ(A,&Ad,&Ao,&garray);CHKERRQ(ierr);
794   ierr = MatSeqAIJGetArrayRead(Ad,&av);CHKERRQ(ierr);
795   ierr = MatSeqAIJGetArrayRead(Ao,&bv);CHKERRQ(ierr);
796 
797   aa    = (Mat_SeqAIJ*)(Ad)->data;
798   bb    = (Mat_SeqAIJ*)(Ao)->data;
799   ai    = aa->i;
800   aj    = aa->j;
801   adiag = aa->diag;
802   bi    = bb->i;
803   bj    = bb->j;
804 
805   rstart = A->rmap->rstart;
806 
807   if (reuse == MAT_INITIAL_MATRIX) {
808     nza = 0;    /* num of upper triangular entries in mat->A, including diagonals */
809     nzb = 0;    /* num of upper triangular entries in mat->B */
810     for (i=0; i<m; i++) {
811       nza   += (ai[i+1] - adiag[i]);
812       countB = bi[i+1] - bi[i];
813       bjj    = bj + bi[i];
814       for (j=0; j<countB; j++) {
815         if (garray[bjj[j]] > rstart) nzb++;
816       }
817     }
818 
819     nz   = nza + nzb; /* total nz of upper triangular part of mat */
820     *nnz = nz;
821     ierr = PetscMalloc((2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar)), &row);CHKERRQ(ierr);
822     col  = row + nz;
823     val  = (PetscScalar*)(col + nz);
824 
825     *r = row; *c = col; *v = val;
826   } else {
827     row = *r; col = *c; val = *v;
828   }
829 
830   jj = 0; irow = rstart;
831   for (i=0; i<m; i++) {
832     ajj    = aj + adiag[i];                 /* ptr to the beginning of the diagonal of this row */
833     v1     = av + adiag[i];
834     countA = ai[i+1] - adiag[i];
835     countB = bi[i+1] - bi[i];
836     bjj    = bj + bi[i];
837     v2     = bv + bi[i];
838 
839     /* A-part */
840     for (j=0; j<countA; j++) {
841       if (reuse == MAT_INITIAL_MATRIX) {
842         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
843       }
844       val[jj++] = v1[j];
845     }
846 
847     /* B-part */
848     for (j=0; j < countB; j++) {
849       if (garray[bjj[j]] > rstart) {
850         if (reuse == MAT_INITIAL_MATRIX) {
851           row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
852         }
853         val[jj++] = v2[j];
854       }
855     }
856     irow++;
857   }
858   ierr = MatSeqAIJRestoreArrayRead(Ad,&av);CHKERRQ(ierr);
859   ierr = MatSeqAIJRestoreArrayRead(Ao,&bv);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 PetscErrorCode MatDestroy_MUMPS(Mat A)
864 {
865   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
866   PetscErrorCode ierr;
867 
868   PetscFunctionBegin;
869   ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
870   ierr = VecScatterDestroy(&mumps->scat_rhs);CHKERRQ(ierr);
871   ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
872   ierr = VecDestroy(&mumps->b_seq);CHKERRQ(ierr);
873   ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
874   ierr = PetscFree(mumps->id.perm_in);CHKERRQ(ierr);
875   ierr = PetscFree(mumps->irn);CHKERRQ(ierr);
876   ierr = PetscFree(mumps->info);CHKERRQ(ierr);
877   ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
878   mumps->id.job = JOB_END;
879   PetscMUMPS_c(mumps);
880   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in MatDestroy_MUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
881 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
882   if (mumps->use_petsc_omp_support) { ierr = PetscOmpCtrlDestroy(&mumps->omp_ctrl);CHKERRQ(ierr); }
883 #endif
884   ierr = PetscFree2(mumps->recvcount,mumps->displs);CHKERRQ(ierr);
885   ierr = PetscFree(A->data);CHKERRQ(ierr);
886 
887   /* clear composed functions */
888   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
889   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorSetSchurIS_C",NULL);CHKERRQ(ierr);
890   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorCreateSchurComplement_C",NULL);CHKERRQ(ierr);
891   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetIcntl_C",NULL);CHKERRQ(ierr);
892   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetIcntl_C",NULL);CHKERRQ(ierr);
893   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsSetCntl_C",NULL);CHKERRQ(ierr);
894   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetCntl_C",NULL);CHKERRQ(ierr);
895   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfo_C",NULL);CHKERRQ(ierr);
896   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInfog_C",NULL);CHKERRQ(ierr);
897   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfo_C",NULL);CHKERRQ(ierr);
898   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetRinfog_C",NULL);CHKERRQ(ierr);
899   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverse_C",NULL);CHKERRQ(ierr);
900   ierr = PetscObjectComposeFunction((PetscObject)A,"MatMumpsGetInverseTranspose_C",NULL);CHKERRQ(ierr);
901   PetscFunctionReturn(0);
902 }
903 
904 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x)
905 {
906   Mat_MUMPS        *mumps=(Mat_MUMPS*)A->data;
907   PetscScalar      *array;
908   Vec              b_seq;
909   IS               is_iden,is_petsc;
910   PetscErrorCode   ierr;
911   PetscInt         i;
912   PetscBool        second_solve = PETSC_FALSE;
913   static PetscBool cite1 = PETSC_FALSE,cite2 = PETSC_FALSE;
914 
915   PetscFunctionBegin;
916   ierr = PetscCitationsRegister("@article{MUMPS01,\n  author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n  title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n  journal = {SIAM Journal on Matrix Analysis and Applications},\n  volume = {23},\n  number = {1},\n  pages = {15--41},\n  year = {2001}\n}\n",&cite1);CHKERRQ(ierr);
917   ierr = PetscCitationsRegister("@article{MUMPS02,\n  author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n  title = {Hybrid scheduling for the parallel solution of linear systems},\n  journal = {Parallel Computing},\n  volume = {32},\n  number = {2},\n  pages = {136--156},\n  year = {2006}\n}\n",&cite2);CHKERRQ(ierr);
918 
919   if (A->factorerrortype) {
920     ierr = PetscInfo2(A,"MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
921     ierr = VecSetInf(x);CHKERRQ(ierr);
922     PetscFunctionReturn(0);
923   }
924 
925   mumps->id.ICNTL(20) = 0; /* dense RHS */
926   mumps->id.nrhs      = 1;
927   b_seq               = mumps->b_seq;
928   if (mumps->petsc_size > 1) {
929     /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */
930     ierr = VecScatterBegin(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
931     ierr = VecScatterEnd(mumps->scat_rhs,b,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
932     if (!mumps->myid) {ierr = VecGetArray(b_seq,&array);CHKERRQ(ierr);}
933   } else {  /* petsc_size == 1 */
934     ierr = VecCopy(b,x);CHKERRQ(ierr);
935     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
936   }
937   if (!mumps->myid) { /* define rhs on the host */
938     mumps->id.nrhs = 1;
939     mumps->id.rhs = (MumpsScalar*)array;
940   }
941 
942   /*
943      handle condensation step of Schur complement (if any)
944      We set by default ICNTL(26) == -1 when Schur indices have been provided by the user.
945      According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase
946      Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system.
947      This requires an extra call to PetscMUMPS_c and the computation of the factors for S
948   */
949   if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
950     if (mumps->petsc_size > 1) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
951     second_solve = PETSC_TRUE;
952     ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
953   }
954   /* solve phase */
955   /*-------------*/
956   mumps->id.job = JOB_SOLVE;
957   PetscMUMPS_c(mumps);
958   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
959 
960   /* handle expansion step of Schur complement (if any) */
961   if (second_solve) {
962     ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
963   }
964 
965   if (mumps->petsc_size > 1) { /* convert mumps distributed solution to petsc mpi x */
966     if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) {
967       /* when id.ICNTL(9) changes, the contents of lsol_loc may change (not its size, lsol_loc), recreates scat_sol */
968       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
969     }
970     if (!mumps->scat_sol) { /* create scatter scat_sol */
971       ierr = ISCreateStride(PETSC_COMM_SELF,mumps->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
972       for (i=0; i<mumps->id.lsol_loc; i++) {
973         mumps->id.isol_loc[i] -= 1; /* change Fortran style to C style */
974       }
975       ierr = ISCreateGeneral(PETSC_COMM_SELF,mumps->id.lsol_loc,mumps->id.isol_loc,PETSC_COPY_VALUES,&is_petsc);CHKERRQ(ierr);  /* to */
976       ierr = VecScatterCreate(mumps->x_seq,is_iden,x,is_petsc,&mumps->scat_sol);CHKERRQ(ierr);
977       ierr = ISDestroy(&is_iden);CHKERRQ(ierr);
978       ierr = ISDestroy(&is_petsc);CHKERRQ(ierr);
979 
980       mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */
981     }
982 
983     ierr = VecScatterBegin(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
984     ierr = VecScatterEnd(mumps->scat_sol,mumps->x_seq,x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
985   }
986 
987   if (mumps->petsc_size > 1) {if (!mumps->myid) {ierr = VecRestoreArray(b_seq,&array);CHKERRQ(ierr);}}
988   else {ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);}
989 
990   ierr = PetscLogFlops(2.0*mumps->id.RINFO(3));CHKERRQ(ierr);
991   PetscFunctionReturn(0);
992 }
993 
994 PetscErrorCode MatSolveTranspose_MUMPS(Mat A,Vec b,Vec x)
995 {
996   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->data;
997   PetscErrorCode ierr;
998 
999   PetscFunctionBegin;
1000   mumps->id.ICNTL(9) = 0;
1001   ierr = MatSolve_MUMPS(A,b,x);CHKERRQ(ierr);
1002   mumps->id.ICNTL(9) = 1;
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 PetscErrorCode MatMatSolve_MUMPS(Mat A,Mat B,Mat X)
1007 {
1008   PetscErrorCode    ierr;
1009   Mat               Bt = NULL;
1010   PetscBool         flg, flgT;
1011   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1012   PetscInt          i,nrhs,M;
1013   PetscScalar       *array;
1014   const PetscScalar *rbray;
1015   PetscInt          lsol_loc,nlsol_loc,*isol_loc,*idxx,*isol_loc_save,iidx = 0;
1016   PetscScalar       *bray,*sol_loc,*sol_loc_save;
1017   IS                is_to,is_from;
1018   PetscInt          k,proc,j,m,myrstart;
1019   const PetscInt    *rstart;
1020   Vec               v_mpi,b_seq,msol_loc;
1021   VecScatter        scat_rhs,scat_sol;
1022   PetscScalar       *aa;
1023   PetscInt          spnr,*ia,*ja;
1024   Mat_MPIAIJ        *b = NULL;
1025 
1026   PetscFunctionBegin;
1027   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1028   if (!flg) SETERRQ(PetscObjectComm((PetscObject)X),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
1029 
1030   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
1031   if (flg) { /* dense B */
1032     if (B->rmap->n != X->rmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix B and X must have same row distribution");
1033     mumps->id.ICNTL(20)= 0; /* dense RHS */
1034   } else { /* sparse B */
1035     if (X == B) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_IDN,"X and B must be different matrices");
1036     ierr = PetscObjectTypeCompare((PetscObject)B,MATTRANSPOSEMAT,&flgT);CHKERRQ(ierr);
1037     if (flgT) { /* input B is transpose of actural RHS matrix,
1038                  because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */
1039       ierr = MatTransposeGetMat(B,&Bt);CHKERRQ(ierr);
1040     } else SETERRQ(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_WRONG,"Matrix B must be MATTRANSPOSEMAT matrix");
1041     mumps->id.ICNTL(20)= 1; /* sparse RHS */
1042   }
1043 
1044   ierr = MatGetSize(B,&M,&nrhs);CHKERRQ(ierr);
1045   mumps->id.nrhs = nrhs;
1046   mumps->id.lrhs = M;
1047   mumps->id.rhs  = NULL;
1048 
1049   if (mumps->petsc_size == 1) {
1050     PetscScalar *aa;
1051     PetscInt    spnr,*ia,*ja;
1052     PetscBool   second_solve = PETSC_FALSE;
1053 
1054     ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1055     mumps->id.rhs = (MumpsScalar*)array;
1056 
1057     if (!Bt) { /* dense B */
1058       /* copy B to X */
1059       ierr = MatDenseGetArrayRead(B,&rbray);CHKERRQ(ierr);
1060       ierr = PetscArraycpy(array,rbray,M*nrhs);CHKERRQ(ierr);
1061       ierr = MatDenseRestoreArrayRead(B,&rbray);CHKERRQ(ierr);
1062     } else { /* sparse B */
1063       ierr = MatSeqAIJGetArray(Bt,&aa);CHKERRQ(ierr);
1064       ierr = MatGetRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1065       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1066       /* mumps requires ia and ja start at 1! */
1067       mumps->id.irhs_ptr    = ia;
1068       mumps->id.irhs_sparse = ja;
1069       mumps->id.nz_rhs      = ia[spnr] - 1;
1070       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1071     }
1072     /* handle condensation step of Schur complement (if any) */
1073     if (mumps->id.size_schur > 0 && (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2)) {
1074       second_solve = PETSC_TRUE;
1075       ierr = MatMumpsHandleSchur_Private(A,PETSC_FALSE);CHKERRQ(ierr);
1076     }
1077     /* solve phase */
1078     /*-------------*/
1079     mumps->id.job = JOB_SOLVE;
1080     PetscMUMPS_c(mumps);
1081     if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1082 
1083     /* handle expansion step of Schur complement (if any) */
1084     if (second_solve) {
1085       ierr = MatMumpsHandleSchur_Private(A,PETSC_TRUE);CHKERRQ(ierr);
1086     }
1087     if (Bt) { /* sparse B */
1088       ierr = MatSeqAIJRestoreArray(Bt,&aa);CHKERRQ(ierr);
1089       ierr = MatRestoreRowIJ(Bt,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1090       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1091     }
1092     ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1093     PetscFunctionReturn(0);
1094   }
1095 
1096   /*--------- parallel case: MUMPS requires rhs B to be centralized on the host! --------*/
1097   if (mumps->petsc_size > 1 && mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Parallel Schur complements not yet supported from PETSc\n");
1098 
1099   /* create msol_loc to hold mumps local solution */
1100   isol_loc_save = mumps->id.isol_loc; /* save it for MatSolve() */
1101   sol_loc_save  = (PetscScalar*)mumps->id.sol_loc;
1102 
1103   lsol_loc  = mumps->id.lsol_loc;
1104   nlsol_loc = nrhs*lsol_loc;     /* length of sol_loc */
1105   ierr = PetscMalloc2(nlsol_loc,&sol_loc,lsol_loc,&isol_loc);CHKERRQ(ierr);
1106   mumps->id.sol_loc  = (MumpsScalar*)sol_loc;
1107   mumps->id.isol_loc = isol_loc;
1108 
1109   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nlsol_loc,(PetscScalar*)sol_loc,&msol_loc);CHKERRQ(ierr);
1110 
1111   if (!Bt) { /* dense B */
1112     /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in
1113        very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank
1114        0, re-arrange B into desired order, which is a local operation.
1115      */
1116 
1117     /* scatter v_mpi to b_seq because MUMPS only supports centralized rhs */
1118     /* wrap dense rhs matrix B into a vector v_mpi */
1119     ierr = MatGetLocalSize(B,&m,NULL);CHKERRQ(ierr);
1120     ierr = MatDenseGetArray(B,&bray);CHKERRQ(ierr);
1121     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)B),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1122     ierr = MatDenseRestoreArray(B,&bray);CHKERRQ(ierr);
1123 
1124     /* scatter v_mpi to b_seq in proc[0]. MUMPS requires rhs to be centralized on the host! */
1125     if (!mumps->myid) {
1126       PetscInt *idx;
1127       /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */
1128       ierr = PetscMalloc1(nrhs*M,&idx);CHKERRQ(ierr);
1129       ierr = MatGetOwnershipRanges(B,&rstart);CHKERRQ(ierr);
1130       k = 0;
1131       for (proc=0; proc<mumps->petsc_size; proc++){
1132         for (j=0; j<nrhs; j++){
1133           for (i=rstart[proc]; i<rstart[proc+1]; i++) idx[k++] = j*M + i;
1134         }
1135       }
1136 
1137       ierr = VecCreateSeq(PETSC_COMM_SELF,nrhs*M,&b_seq);CHKERRQ(ierr);
1138       ierr = ISCreateGeneral(PETSC_COMM_SELF,nrhs*M,idx,PETSC_OWN_POINTER,&is_to);CHKERRQ(ierr);
1139       ierr = ISCreateStride(PETSC_COMM_SELF,nrhs*M,0,1,&is_from);CHKERRQ(ierr);
1140     } else {
1141       ierr = VecCreateSeq(PETSC_COMM_SELF,0,&b_seq);CHKERRQ(ierr);
1142       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_to);CHKERRQ(ierr);
1143       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_from);CHKERRQ(ierr);
1144     }
1145     ierr = VecScatterCreate(v_mpi,is_from,b_seq,is_to,&scat_rhs);CHKERRQ(ierr);
1146     ierr = VecScatterBegin(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1147     ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1148     ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1149     ierr = VecScatterEnd(scat_rhs,v_mpi,b_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1150 
1151     if (!mumps->myid) { /* define rhs on the host */
1152       ierr = VecGetArray(b_seq,&bray);CHKERRQ(ierr);
1153       mumps->id.rhs = (MumpsScalar*)bray;
1154       ierr = VecRestoreArray(b_seq,&bray);CHKERRQ(ierr);
1155     }
1156 
1157   } else { /* sparse B */
1158     b = (Mat_MPIAIJ*)Bt->data;
1159 
1160     /* wrap dense X into a vector v_mpi */
1161     ierr = MatGetLocalSize(X,&m,NULL);CHKERRQ(ierr);
1162     ierr = MatDenseGetArray(X,&bray);CHKERRQ(ierr);
1163     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)X),1,nrhs*m,nrhs*M,(const PetscScalar*)bray,&v_mpi);CHKERRQ(ierr);
1164     ierr = MatDenseRestoreArray(X,&bray);CHKERRQ(ierr);
1165 
1166     if (!mumps->myid) {
1167       ierr = MatSeqAIJGetArray(b->A,&aa);CHKERRQ(ierr);
1168       ierr = MatGetRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1169       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
1170       /* mumps requires ia and ja start at 1! */
1171       mumps->id.irhs_ptr    = ia;
1172       mumps->id.irhs_sparse = ja;
1173       mumps->id.nz_rhs      = ia[spnr] - 1;
1174       mumps->id.rhs_sparse  = (MumpsScalar*)aa;
1175     } else {
1176       mumps->id.irhs_ptr    = NULL;
1177       mumps->id.irhs_sparse = NULL;
1178       mumps->id.nz_rhs      = 0;
1179       mumps->id.rhs_sparse  = NULL;
1180     }
1181   }
1182 
1183   /* solve phase */
1184   /*-------------*/
1185   mumps->id.job = JOB_SOLVE;
1186   PetscMUMPS_c(mumps);
1187   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1188 
1189   /* scatter mumps distributed solution to petsc vector v_mpi, which shares local arrays with solution matrix X */
1190   ierr = MatDenseGetArray(X,&array);CHKERRQ(ierr);
1191   ierr = VecPlaceArray(v_mpi,array);CHKERRQ(ierr);
1192 
1193   /* create scatter scat_sol */
1194   ierr = MatGetOwnershipRanges(X,&rstart);CHKERRQ(ierr);
1195   /* iidx: index for scatter mumps solution to petsc X */
1196 
1197   ierr = ISCreateStride(PETSC_COMM_SELF,nlsol_loc,0,1,&is_from);CHKERRQ(ierr);
1198   ierr = PetscMalloc1(nlsol_loc,&idxx);CHKERRQ(ierr);
1199   for (i=0; i<lsol_loc; i++) {
1200     isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */
1201 
1202     for (proc=0; proc<mumps->petsc_size; proc++){
1203       if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc+1]) {
1204         myrstart = rstart[proc];
1205         k        = isol_loc[i] - myrstart;        /* local index on 1st column of petsc vector X */
1206         iidx     = k + myrstart*nrhs;             /* maps mumps isol_loc[i] to petsc index in X */
1207         m        = rstart[proc+1] - rstart[proc]; /* rows of X for this proc */
1208         break;
1209       }
1210     }
1211 
1212     for (j=0; j<nrhs; j++) idxx[i+j*lsol_loc] = iidx + j*m;
1213   }
1214   ierr = ISCreateGeneral(PETSC_COMM_SELF,nlsol_loc,idxx,PETSC_COPY_VALUES,&is_to);CHKERRQ(ierr);
1215   ierr = VecScatterCreate(msol_loc,is_from,v_mpi,is_to,&scat_sol);CHKERRQ(ierr);
1216   ierr = VecScatterBegin(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1217   ierr = ISDestroy(&is_from);CHKERRQ(ierr);
1218   ierr = ISDestroy(&is_to);CHKERRQ(ierr);
1219   ierr = VecScatterEnd(scat_sol,msol_loc,v_mpi,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1220   ierr = MatDenseRestoreArray(X,&array);CHKERRQ(ierr);
1221 
1222   /* free spaces */
1223   mumps->id.sol_loc  = (MumpsScalar*)sol_loc_save;
1224   mumps->id.isol_loc = isol_loc_save;
1225 
1226   ierr = PetscFree2(sol_loc,isol_loc);CHKERRQ(ierr);
1227   ierr = PetscFree(idxx);CHKERRQ(ierr);
1228   ierr = VecDestroy(&msol_loc);CHKERRQ(ierr);
1229   ierr = VecDestroy(&v_mpi);CHKERRQ(ierr);
1230   if (Bt) {
1231     if (!mumps->myid) {
1232       b = (Mat_MPIAIJ*)Bt->data;
1233       ierr = MatSeqAIJRestoreArray(b->A,&aa);CHKERRQ(ierr);
1234       ierr = MatRestoreRowIJ(b->A,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
1235       if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot restore IJ structure");
1236     }
1237   } else {
1238     ierr = VecDestroy(&b_seq);CHKERRQ(ierr);
1239     ierr = VecScatterDestroy(&scat_rhs);CHKERRQ(ierr);
1240   }
1241   ierr = VecScatterDestroy(&scat_sol);CHKERRQ(ierr);
1242   ierr = PetscLogFlops(2.0*nrhs*mumps->id.RINFO(3));CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A,Mat Bt,Mat X)
1247 {
1248   PetscErrorCode ierr;
1249   PetscBool      flg;
1250   Mat            B;
1251 
1252   PetscFunctionBegin;
1253   ierr = PetscObjectTypeCompareAny((PetscObject)Bt,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
1254   if (!flg) SETERRQ(PetscObjectComm((PetscObject)Bt),PETSC_ERR_ARG_WRONG,"Matrix Bt must be MATAIJ matrix");
1255 
1256   /* Create B=Bt^T that uses Bt's data structure */
1257   ierr = MatCreateTranspose(Bt,&B);CHKERRQ(ierr);
1258 
1259   ierr = MatMatSolve_MUMPS(A,B,X);CHKERRQ(ierr);
1260   ierr = MatDestroy(&B);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #if !defined(PETSC_USE_COMPLEX)
1265 /*
1266   input:
1267    F:        numeric factor
1268   output:
1269    nneg:     total number of negative pivots
1270    nzero:    total number of zero pivots
1271    npos:     (global dimension of F) - nneg - nzero
1272 */
1273 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
1274 {
1275   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
1276   PetscErrorCode ierr;
1277   PetscMPIInt    size;
1278 
1279   PetscFunctionBegin;
1280   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)F),&size);CHKERRQ(ierr);
1281   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
1282   if (size > 1 && mumps->id.ICNTL(13) != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",mumps->id.INFOG(13));
1283 
1284   if (nneg) *nneg = mumps->id.INFOG(12);
1285   if (nzero || npos) {
1286     if (mumps->id.ICNTL(24) != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"-mat_mumps_icntl_24 must be set as 1 for null pivot row detection");
1287     if (nzero) *nzero = mumps->id.INFOG(28);
1288     if (npos) *npos   = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28));
1289   }
1290   PetscFunctionReturn(0);
1291 }
1292 #endif
1293 
1294 PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse,Mat_MUMPS *mumps)
1295 {
1296   PetscErrorCode ierr;
1297   PetscInt       i,nz=0,*irn,*jcn=0;
1298   PetscScalar    *val=0;
1299   PetscMPIInt    mpinz,*recvcount=NULL,*displs=NULL;
1300 
1301   PetscFunctionBegin;
1302   if (mumps->omp_comm_size > 1) {
1303     if (reuse == MAT_INITIAL_MATRIX) {
1304       /* master first gathers counts of nonzeros to receive */
1305       if (mumps->is_omp_master) { ierr = PetscMalloc2(mumps->omp_comm_size,&recvcount,mumps->omp_comm_size,&displs);CHKERRQ(ierr); }
1306       ierr = PetscMPIIntCast(mumps->nz,&mpinz);CHKERRQ(ierr);
1307       ierr = MPI_Gather(&mpinz,1,MPI_INT,recvcount,1,MPI_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr);
1308 
1309       /* master allocates memory to receive nonzeros */
1310       if (mumps->is_omp_master) {
1311         displs[0] = 0;
1312         for (i=1; i<mumps->omp_comm_size; i++) displs[i] = displs[i-1] + recvcount[i-1];
1313         nz   = displs[mumps->omp_comm_size-1] + recvcount[mumps->omp_comm_size-1];
1314         ierr = PetscMalloc(2*nz*sizeof(PetscInt)+nz*sizeof(PetscScalar),&irn);CHKERRQ(ierr);
1315         jcn  = irn + nz;
1316         val  = (PetscScalar*)(jcn + nz);
1317       }
1318 
1319       /* save the gatherv plan */
1320       mumps->mpinz     = mpinz; /* used as send count */
1321       mumps->recvcount = recvcount;
1322       mumps->displs    = displs;
1323 
1324       /* master gathers nonzeros */
1325       ierr = MPI_Gatherv(mumps->irn,mpinz,MPIU_INT,irn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr);
1326       ierr = MPI_Gatherv(mumps->jcn,mpinz,MPIU_INT,jcn,mumps->recvcount,mumps->displs,MPIU_INT,0/*root*/,mumps->omp_comm);CHKERRQ(ierr);
1327       ierr = MPI_Gatherv(mumps->val,mpinz,MPIU_SCALAR,val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr);
1328 
1329       /* master frees its row/col/val and replaces them with bigger arrays */
1330       if (mumps->is_omp_master) {
1331         ierr = PetscFree(mumps->irn);CHKERRQ(ierr); /* irn/jcn/val are allocated together so free only irn */
1332         mumps->nz  = nz; /* it is a sum of mpinz over omp_comm */
1333         mumps->irn = irn;
1334         mumps->jcn = jcn;
1335         mumps->val = val;
1336       }
1337     } else {
1338       ierr = MPI_Gatherv((mumps->is_omp_master?MPI_IN_PLACE:mumps->val),mumps->mpinz,MPIU_SCALAR,mumps->val,mumps->recvcount,mumps->displs,MPIU_SCALAR,0/*root*/,mumps->omp_comm);CHKERRQ(ierr);
1339     }
1340   }
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 PetscErrorCode MatFactorNumeric_MUMPS(Mat F,Mat A,const MatFactorInfo *info)
1345 {
1346   Mat_MUMPS      *mumps =(Mat_MUMPS*)(F)->data;
1347   PetscErrorCode ierr;
1348   PetscBool      isMPIAIJ;
1349 
1350   PetscFunctionBegin;
1351   if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) {
1352     if (mumps->id.INFOG(1) == -6) {
1353       ierr = PetscInfo2(A,"MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1354     }
1355     ierr = PetscInfo2(A,"MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1356     PetscFunctionReturn(0);
1357   }
1358 
1359   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr);
1360   ierr = MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX,mumps);CHKERRQ(ierr);
1361 
1362   /* numerical factorization phase */
1363   /*-------------------------------*/
1364   mumps->id.job = JOB_FACTNUMERIC;
1365   if (!mumps->id.ICNTL(18)) { /* A is centralized */
1366     if (!mumps->myid) {
1367       mumps->id.a = (MumpsScalar*)mumps->val;
1368     }
1369   } else {
1370     mumps->id.a_loc = (MumpsScalar*)mumps->val;
1371   }
1372   PetscMUMPS_c(mumps);
1373   if (mumps->id.INFOG(1) < 0) {
1374     if (A->erroriffailure) {
1375       SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
1376     } else {
1377       if (mumps->id.INFOG(1) == -10) { /* numerically singular matrix */
1378         ierr = PetscInfo2(F,"matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1379         F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
1380       } else if (mumps->id.INFOG(1) == -13) {
1381         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, cannot allocate required memory %d megabytes\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1382         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1383       } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10) ) {
1384         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d, problem with workarray \n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1385         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1386       } else {
1387         ierr = PetscInfo2(F,"MUMPS in numerical factorization phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1388         F->factorerrortype = MAT_FACTOR_OTHER;
1389       }
1390     }
1391   }
1392   if (!mumps->myid && mumps->id.ICNTL(16) > 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"  mumps->id.ICNTL(16):=%d\n",mumps->id.INFOG(16));
1393 
1394   F->assembled    = PETSC_TRUE;
1395   mumps->matstruc = SAME_NONZERO_PATTERN;
1396   if (F->schur) { /* reset Schur status to unfactored */
1397 #if defined(PETSC_HAVE_CUDA)
1398     F->schur->offloadmask = PETSC_OFFLOAD_CPU;
1399 #endif
1400     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
1401       mumps->id.ICNTL(19) = 2;
1402       ierr = MatTranspose(F->schur,MAT_INPLACE_MATRIX,&F->schur);CHKERRQ(ierr);
1403     }
1404     ierr = MatFactorRestoreSchurComplement(F,NULL,MAT_FACTOR_SCHUR_UNFACTORED);CHKERRQ(ierr);
1405   }
1406 
1407   /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */
1408   if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3;
1409 
1410   if (!mumps->is_omp_master) mumps->id.INFO(23) = 0;
1411   if (mumps->petsc_size > 1) {
1412     PetscInt    lsol_loc;
1413     PetscScalar *sol_loc;
1414 
1415     ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIAIJ,&isMPIAIJ);CHKERRQ(ierr);
1416 
1417     /* distributed solution; Create x_seq=sol_loc for repeated use */
1418     if (mumps->x_seq) {
1419       ierr = VecScatterDestroy(&mumps->scat_sol);CHKERRQ(ierr);
1420       ierr = PetscFree2(mumps->id.sol_loc,mumps->id.isol_loc);CHKERRQ(ierr);
1421       ierr = VecDestroy(&mumps->x_seq);CHKERRQ(ierr);
1422     }
1423     lsol_loc = mumps->id.INFO(23); /* length of sol_loc */
1424     ierr = PetscMalloc2(lsol_loc,&sol_loc,lsol_loc,&mumps->id.isol_loc);CHKERRQ(ierr);
1425     mumps->id.lsol_loc = lsol_loc;
1426     mumps->id.sol_loc = (MumpsScalar*)sol_loc;
1427     ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lsol_loc,sol_loc,&mumps->x_seq);CHKERRQ(ierr);
1428   }
1429   ierr = PetscLogFlops(mumps->id.RINFO(2));CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 /* Sets MUMPS options from the options database */
1434 PetscErrorCode PetscSetMUMPSFromOptions(Mat F, Mat A)
1435 {
1436   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1437   PetscErrorCode ierr;
1438   PetscInt       icntl,info[80],i,ninfo=80;
1439   PetscBool      flg;
1440 
1441   PetscFunctionBegin;
1442   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
1443   ierr = PetscOptionsInt("-mat_mumps_icntl_1","ICNTL(1): output stream for error messages","None",mumps->id.ICNTL(1),&icntl,&flg);CHKERRQ(ierr);
1444   if (flg) mumps->id.ICNTL(1) = icntl;
1445   ierr = PetscOptionsInt("-mat_mumps_icntl_2","ICNTL(2): output stream for diagnostic printing, statistics, and warning","None",mumps->id.ICNTL(2),&icntl,&flg);CHKERRQ(ierr);
1446   if (flg) mumps->id.ICNTL(2) = icntl;
1447   ierr = PetscOptionsInt("-mat_mumps_icntl_3","ICNTL(3): output stream for global information, collected on the host","None",mumps->id.ICNTL(3),&icntl,&flg);CHKERRQ(ierr);
1448   if (flg) mumps->id.ICNTL(3) = icntl;
1449 
1450   ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",mumps->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
1451   if (flg) mumps->id.ICNTL(4) = icntl;
1452   if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */
1453 
1454   ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)","None",mumps->id.ICNTL(6),&icntl,&flg);CHKERRQ(ierr);
1455   if (flg) mumps->id.ICNTL(6) = icntl;
1456 
1457   ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis","None",mumps->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
1458   if (flg) {
1459     if (icntl== 1 && mumps->petsc_size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
1460     else mumps->id.ICNTL(7) = icntl;
1461   }
1462 
1463   ierr = PetscOptionsInt("-mat_mumps_icntl_8","ICNTL(8): scaling strategy (-2 to 8 or 77)","None",mumps->id.ICNTL(8),&mumps->id.ICNTL(8),NULL);CHKERRQ(ierr);
1464   /* ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL);CHKERRQ(ierr); handled by MatSolveTranspose_MUMPS() */
1465   ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",mumps->id.ICNTL(10),&mumps->id.ICNTL(10),NULL);CHKERRQ(ierr);
1466   ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): statistics related to an error analysis (via -ksp_view)","None",mumps->id.ICNTL(11),&mumps->id.ICNTL(11),NULL);CHKERRQ(ierr);
1467   ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)","None",mumps->id.ICNTL(12),&mumps->id.ICNTL(12),NULL);CHKERRQ(ierr);
1468   ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting","None",mumps->id.ICNTL(13),&mumps->id.ICNTL(13),NULL);CHKERRQ(ierr);
1469   ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage increase in the estimated working space","None",mumps->id.ICNTL(14),&mumps->id.ICNTL(14),NULL);CHKERRQ(ierr);
1470   ierr = PetscOptionsInt("-mat_mumps_icntl_19","ICNTL(19): computes the Schur complement","None",mumps->id.ICNTL(19),&mumps->id.ICNTL(19),NULL);CHKERRQ(ierr);
1471   if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */
1472     ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
1473     ierr = MatMumpsResetSchur_Private(mumps);CHKERRQ(ierr);
1474   }
1475   /* ierr = PetscOptionsInt("-mat_mumps_icntl_20","ICNTL(20): the format (dense or sparse) of the right-hand sides","None",mumps->id.ICNTL(20),&mumps->id.ICNTL(20),NULL);CHKERRQ(ierr); -- sparse rhs is not supported in PETSc API */
1476   /* ierr = PetscOptionsInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL);CHKERRQ(ierr); we only use distributed solution vector */
1477 
1478   ierr = PetscOptionsInt("-mat_mumps_icntl_22","ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)","None",mumps->id.ICNTL(22),&mumps->id.ICNTL(22),NULL);CHKERRQ(ierr);
1479   ierr = PetscOptionsInt("-mat_mumps_icntl_23","ICNTL(23): max size of the working memory (MB) that can allocate per processor","None",mumps->id.ICNTL(23),&mumps->id.ICNTL(23),NULL);CHKERRQ(ierr);
1480   ierr = PetscOptionsInt("-mat_mumps_icntl_24","ICNTL(24): detection of null pivot rows (0 or 1)","None",mumps->id.ICNTL(24),&mumps->id.ICNTL(24),NULL);CHKERRQ(ierr);
1481   if (mumps->id.ICNTL(24)) {
1482     mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */
1483   }
1484 
1485   ierr = PetscOptionsInt("-mat_mumps_icntl_25","ICNTL(25): computes a solution of a deficient matrix and a null space basis","None",mumps->id.ICNTL(25),&mumps->id.ICNTL(25),NULL);CHKERRQ(ierr);
1486   ierr = PetscOptionsInt("-mat_mumps_icntl_26","ICNTL(26): drives the solution phase if a Schur complement matrix","None",mumps->id.ICNTL(26),&mumps->id.ICNTL(26),NULL);CHKERRQ(ierr);
1487   ierr = PetscOptionsInt("-mat_mumps_icntl_27","ICNTL(27): the blocking size for multiple right-hand sides","None",mumps->id.ICNTL(27),&mumps->id.ICNTL(27),NULL);CHKERRQ(ierr);
1488   ierr = PetscOptionsInt("-mat_mumps_icntl_28","ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering","None",mumps->id.ICNTL(28),&mumps->id.ICNTL(28),NULL);CHKERRQ(ierr);
1489   ierr = PetscOptionsInt("-mat_mumps_icntl_29","ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis","None",mumps->id.ICNTL(29),&mumps->id.ICNTL(29),NULL);CHKERRQ(ierr);
1490   /* ierr = PetscOptionsInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL);CHKERRQ(ierr); */ /* call MatMumpsGetInverse() directly */
1491   ierr = PetscOptionsInt("-mat_mumps_icntl_31","ICNTL(31): indicates which factors may be discarded during factorization","None",mumps->id.ICNTL(31),&mumps->id.ICNTL(31),NULL);CHKERRQ(ierr);
1492   /* ierr = PetscOptionsInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elemination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL);CHKERRQ(ierr);  -- not supported by PETSc API */
1493   ierr = PetscOptionsInt("-mat_mumps_icntl_33","ICNTL(33): compute determinant","None",mumps->id.ICNTL(33),&mumps->id.ICNTL(33),NULL);CHKERRQ(ierr);
1494   ierr = PetscOptionsInt("-mat_mumps_icntl_35","ICNTL(35): activates Block Low Rank (BLR) based factorization","None",mumps->id.ICNTL(35),&mumps->id.ICNTL(35),NULL);CHKERRQ(ierr);
1495   ierr = PetscOptionsInt("-mat_mumps_icntl_36","ICNTL(36): choice of BLR factorization variant","None",mumps->id.ICNTL(36),&mumps->id.ICNTL(36),NULL);CHKERRQ(ierr);
1496   ierr = PetscOptionsInt("-mat_mumps_icntl_38","ICNTL(38): estimated compression rate of LU factors with BLR","None",mumps->id.ICNTL(38),&mumps->id.ICNTL(38),NULL);CHKERRQ(ierr);
1497 
1498   ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",mumps->id.CNTL(1),&mumps->id.CNTL(1),NULL);CHKERRQ(ierr);
1499   ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",mumps->id.CNTL(2),&mumps->id.CNTL(2),NULL);CHKERRQ(ierr);
1500   ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",mumps->id.CNTL(3),&mumps->id.CNTL(3),NULL);CHKERRQ(ierr);
1501   ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",mumps->id.CNTL(4),&mumps->id.CNTL(4),NULL);CHKERRQ(ierr);
1502   ierr = PetscOptionsReal("-mat_mumps_cntl_5","CNTL(5): fixation for null pivots","None",mumps->id.CNTL(5),&mumps->id.CNTL(5),NULL);CHKERRQ(ierr);
1503   ierr = PetscOptionsReal("-mat_mumps_cntl_7","CNTL(7): dropping parameter used during BLR","None",mumps->id.CNTL(7),&mumps->id.CNTL(7),NULL);CHKERRQ(ierr);
1504 
1505   ierr = PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, 256, NULL);CHKERRQ(ierr);
1506 
1507   ierr = PetscOptionsIntArray("-mat_mumps_view_info","request INFO local to each processor","",info,&ninfo,NULL);CHKERRQ(ierr);
1508   if (ninfo) {
1509     if (ninfo > 80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"number of INFO %d must <= 80\n",ninfo);
1510     ierr = PetscMalloc1(ninfo,&mumps->info);CHKERRQ(ierr);
1511     mumps->ninfo = ninfo;
1512     for (i=0; i<ninfo; i++) {
1513       if (info[i] < 0 || info[i]>80) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"index of INFO %d must between 1 and 80\n",ninfo);
1514       else  mumps->info[i] = info[i];
1515     }
1516   }
1517 
1518   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1519   PetscFunctionReturn(0);
1520 }
1521 
1522 PetscErrorCode PetscInitializeMUMPS(Mat A,Mat_MUMPS *mumps)
1523 {
1524   PetscErrorCode ierr;
1525   PetscInt       nthreads=0;
1526 
1527   PetscFunctionBegin;
1528   mumps->petsc_comm = PetscObjectComm((PetscObject)A);
1529   ierr = MPI_Comm_size(mumps->petsc_comm,&mumps->petsc_size);CHKERRQ(ierr);
1530   ierr = MPI_Comm_rank(mumps->petsc_comm,&mumps->myid);CHKERRQ(ierr); /* so that code like "if (!myid)" still works even if mumps_comm is different */
1531 
1532   ierr = PetscOptionsHasName(NULL,NULL,"-mat_mumps_use_omp_threads",&mumps->use_petsc_omp_support);CHKERRQ(ierr);
1533   if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */
1534   ierr = PetscOptionsGetInt(NULL,NULL,"-mat_mumps_use_omp_threads",&nthreads,NULL);CHKERRQ(ierr);
1535   if (mumps->use_petsc_omp_support) {
1536 #if defined(PETSC_HAVE_OPENMP_SUPPORT)
1537     ierr = PetscOmpCtrlCreate(mumps->petsc_comm,nthreads,&mumps->omp_ctrl);CHKERRQ(ierr);
1538     ierr = PetscOmpCtrlGetOmpComms(mumps->omp_ctrl,&mumps->omp_comm,&mumps->mumps_comm,&mumps->is_omp_master);CHKERRQ(ierr);
1539 #else
1540     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP_SYS,"the system does not have PETSc OpenMP support but you added the -mat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual\n");
1541 #endif
1542   } else {
1543     mumps->omp_comm      = PETSC_COMM_SELF;
1544     mumps->mumps_comm    = mumps->petsc_comm;
1545     mumps->is_omp_master = PETSC_TRUE;
1546   }
1547   ierr = MPI_Comm_size(mumps->omp_comm,&mumps->omp_comm_size);CHKERRQ(ierr);
1548 
1549   mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm);
1550   mumps->id.job = JOB_INIT;
1551   mumps->id.par = 1;  /* host participates factorizaton and solve */
1552   mumps->id.sym = mumps->sym;
1553 
1554   PetscMUMPS_c(mumps);
1555   if (mumps->id.INFOG(1) < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in PetscInitializeMUMPS: INFOG(1)=%d\n",mumps->id.INFOG(1));
1556 
1557   /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code.
1558      For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS.
1559    */
1560   ierr = MPI_Bcast(mumps->id.icntl,40,MPI_INT,  0,mumps->omp_comm);CHKERRQ(ierr); /* see MUMPS-5.1.2 Manual Section 9 */
1561   ierr = MPI_Bcast(mumps->id.cntl, 15,MPIU_REAL,0,mumps->omp_comm);CHKERRQ(ierr);
1562 
1563   mumps->scat_rhs     = NULL;
1564   mumps->scat_sol     = NULL;
1565 
1566   /* set PETSc-MUMPS default options - override MUMPS default */
1567   mumps->id.ICNTL(3) = 0;
1568   mumps->id.ICNTL(4) = 0;
1569   if (mumps->petsc_size == 1) {
1570     mumps->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
1571   } else {
1572     mumps->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
1573     mumps->id.ICNTL(20) = 0;   /* rhs is in dense format */
1574     mumps->id.ICNTL(21) = 1;   /* distributed solution */
1575   }
1576 
1577   /* schur */
1578   mumps->id.size_schur      = 0;
1579   mumps->id.listvar_schur   = NULL;
1580   mumps->id.schur           = NULL;
1581   mumps->sizeredrhs         = 0;
1582   mumps->schur_sol          = NULL;
1583   mumps->schur_sizesol      = 0;
1584   PetscFunctionReturn(0);
1585 }
1586 
1587 PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F,Mat A,const MatFactorInfo *info,Mat_MUMPS *mumps)
1588 {
1589   PetscErrorCode ierr;
1590 
1591   PetscFunctionBegin;
1592   if (mumps->id.INFOG(1) < 0) {
1593     if (A->erroriffailure) {
1594       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",mumps->id.INFOG(1));
1595     } else {
1596       if (mumps->id.INFOG(1) == -6) {
1597         ierr = PetscInfo2(F,"matrix is singular in structure, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1598         F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT;
1599       } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) {
1600         ierr = PetscInfo2(F,"problem of workspace, INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1601         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
1602       } else if (mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0) {
1603         ierr = PetscInfo(F,"Empty matrix\n");CHKERRQ(ierr);
1604       } else {
1605         ierr = PetscInfo2(F,"Error reported by MUMPS in analysis phase: INFOG(1)=%d, INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));CHKERRQ(ierr);
1606         F->factorerrortype = MAT_FACTOR_OTHER;
1607       }
1608     }
1609   }
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 /* Note Petsc r(=c) permutation is used when mumps->id.ICNTL(7)==1 with centralized assembled matrix input; otherwise r and c are ignored */
1614 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1615 {
1616   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1617   PetscErrorCode ierr;
1618   Vec            b;
1619   const PetscInt M = A->rmap->N;
1620 
1621   PetscFunctionBegin;
1622   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
1623 
1624   /* Set MUMPS options from the options database */
1625   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
1626 
1627   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr);
1628   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
1629 
1630   /* analysis phase */
1631   /*----------------*/
1632   mumps->id.job = JOB_FACTSYMBOLIC;
1633   mumps->id.n   = M;
1634   switch (mumps->id.ICNTL(18)) {
1635   case 0:  /* centralized assembled matrix input */
1636     if (!mumps->myid) {
1637       mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn;
1638       if (mumps->id.ICNTL(6)>1) {
1639         mumps->id.a = (MumpsScalar*)mumps->val;
1640       }
1641       if (mumps->id.ICNTL(7) == 1) { /* use user-provide matrix ordering - assuming r = c ordering */
1642         /*
1643         PetscBool      flag;
1644         ierr = ISEqual(r,c,&flag);CHKERRQ(ierr);
1645         if (!flag) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"row_perm != col_perm");
1646         ierr = ISView(r,PETSC_VIEWER_STDOUT_SELF);
1647          */
1648         if (!mumps->myid) {
1649           const PetscInt *idx;
1650           PetscInt       i,*perm_in;
1651 
1652           ierr = PetscMalloc1(M,&perm_in);CHKERRQ(ierr);
1653           ierr = ISGetIndices(r,&idx);CHKERRQ(ierr);
1654 
1655           mumps->id.perm_in = perm_in;
1656           for (i=0; i<M; i++) perm_in[i] = idx[i]+1; /* perm_in[]: start from 1, not 0! */
1657           ierr = ISRestoreIndices(r,&idx);CHKERRQ(ierr);
1658         }
1659       }
1660     }
1661     break;
1662   case 3:  /* distributed assembled matrix input (size>1) */
1663     mumps->id.nz_loc = mumps->nz;
1664     mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn;
1665     if (mumps->id.ICNTL(6)>1) {
1666       mumps->id.a_loc = (MumpsScalar*)mumps->val;
1667     }
1668     /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1669     ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
1670     ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
1671     ierr = VecDestroy(&b);CHKERRQ(ierr);
1672     break;
1673   }
1674   PetscMUMPS_c(mumps);
1675   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
1676 
1677   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
1678   F->ops->solve           = MatSolve_MUMPS;
1679   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
1680   F->ops->matsolve        = MatMatSolve_MUMPS;
1681   F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS;
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 /* Note the Petsc r and c permutations are ignored */
1686 PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
1687 {
1688   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1689   PetscErrorCode ierr;
1690   Vec            b;
1691   const PetscInt M = A->rmap->N;
1692 
1693   PetscFunctionBegin;
1694   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
1695 
1696   /* Set MUMPS options from the options database */
1697   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
1698 
1699   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr);
1700   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
1701 
1702   /* analysis phase */
1703   /*----------------*/
1704   mumps->id.job = JOB_FACTSYMBOLIC;
1705   mumps->id.n   = M;
1706   switch (mumps->id.ICNTL(18)) {
1707   case 0:  /* centralized assembled matrix input */
1708     if (!mumps->myid) {
1709       mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn;
1710       if (mumps->id.ICNTL(6)>1) {
1711         mumps->id.a = (MumpsScalar*)mumps->val;
1712       }
1713     }
1714     break;
1715   case 3:  /* distributed assembled matrix input (size>1) */
1716     mumps->id.nz_loc = mumps->nz;
1717     mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn;
1718     if (mumps->id.ICNTL(6)>1) {
1719       mumps->id.a_loc = (MumpsScalar*)mumps->val;
1720     }
1721     /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1722     ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
1723     ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
1724     ierr = VecDestroy(&b);CHKERRQ(ierr);
1725     break;
1726   }
1727   PetscMUMPS_c(mumps);
1728   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
1729 
1730   F->ops->lufactornumeric = MatFactorNumeric_MUMPS;
1731   F->ops->solve           = MatSolve_MUMPS;
1732   F->ops->solvetranspose  = MatSolveTranspose_MUMPS;
1733   PetscFunctionReturn(0);
1734 }
1735 
1736 /* Note the Petsc r permutation and factor info are ignored */
1737 PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F,Mat A,IS r,const MatFactorInfo *info)
1738 {
1739   Mat_MUMPS      *mumps = (Mat_MUMPS*)F->data;
1740   PetscErrorCode ierr;
1741   Vec            b;
1742   const PetscInt M = A->rmap->N;
1743 
1744   PetscFunctionBegin;
1745   mumps->matstruc = DIFFERENT_NONZERO_PATTERN;
1746 
1747   /* Set MUMPS options from the options database */
1748   ierr = PetscSetMUMPSFromOptions(F,A);CHKERRQ(ierr);
1749 
1750   ierr = (*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, &mumps->nz, &mumps->irn, &mumps->jcn, &mumps->val);CHKERRQ(ierr);
1751   ierr = MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX,mumps);CHKERRQ(ierr);
1752 
1753   /* analysis phase */
1754   /*----------------*/
1755   mumps->id.job = JOB_FACTSYMBOLIC;
1756   mumps->id.n   = M;
1757   switch (mumps->id.ICNTL(18)) {
1758   case 0:  /* centralized assembled matrix input */
1759     if (!mumps->myid) {
1760       mumps->id.nz =mumps->nz; mumps->id.irn=mumps->irn; mumps->id.jcn=mumps->jcn;
1761       if (mumps->id.ICNTL(6)>1) {
1762         mumps->id.a = (MumpsScalar*)mumps->val;
1763       }
1764     }
1765     break;
1766   case 3:  /* distributed assembled matrix input (size>1) */
1767     mumps->id.nz_loc = mumps->nz;
1768     mumps->id.irn_loc=mumps->irn; mumps->id.jcn_loc=mumps->jcn;
1769     if (mumps->id.ICNTL(6)>1) {
1770       mumps->id.a_loc = (MumpsScalar*)mumps->val;
1771     }
1772     /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
1773     ierr = MatCreateVecs(A,NULL,&b);CHKERRQ(ierr);
1774     ierr = VecScatterCreateToZero(b,&mumps->scat_rhs,&mumps->b_seq);CHKERRQ(ierr);
1775     ierr = VecDestroy(&b);CHKERRQ(ierr);
1776     break;
1777   }
1778   PetscMUMPS_c(mumps);
1779   ierr = MatFactorSymbolic_MUMPS_ReportIfError(F,A,info,mumps);CHKERRQ(ierr);
1780 
1781   F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
1782   F->ops->solve                 = MatSolve_MUMPS;
1783   F->ops->solvetranspose        = MatSolve_MUMPS;
1784   F->ops->matsolve              = MatMatSolve_MUMPS;
1785   F->ops->mattransposesolve     = MatMatTransposeSolve_MUMPS;
1786 #if defined(PETSC_USE_COMPLEX)
1787   F->ops->getinertia = NULL;
1788 #else
1789   F->ops->getinertia = MatGetInertia_SBAIJMUMPS;
1790 #endif
1791   PetscFunctionReturn(0);
1792 }
1793 
1794 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer)
1795 {
1796   PetscErrorCode    ierr;
1797   PetscBool         iascii;
1798   PetscViewerFormat format;
1799   Mat_MUMPS         *mumps=(Mat_MUMPS*)A->data;
1800 
1801   PetscFunctionBegin;
1802   /* check if matrix is mumps type */
1803   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
1804 
1805   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1806   if (iascii) {
1807     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1808     if (format == PETSC_VIEWER_ASCII_INFO) {
1809       ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
1810       ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                   %d \n",mumps->id.sym);CHKERRQ(ierr);
1811       ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):            %d \n",mumps->id.par);CHKERRQ(ierr);
1812       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):         %d \n",mumps->id.ICNTL(1));CHKERRQ(ierr);
1813       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg): %d \n",mumps->id.ICNTL(2));CHKERRQ(ierr);
1814       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):   %d \n",mumps->id.ICNTL(3));CHKERRQ(ierr);
1815       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):        %d \n",mumps->id.ICNTL(4));CHKERRQ(ierr);
1816       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):         %d \n",mumps->id.ICNTL(5));CHKERRQ(ierr);
1817       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):        %d \n",mumps->id.ICNTL(6));CHKERRQ(ierr);
1818       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (sequential matrix ordering):%d \n",mumps->id.ICNTL(7));CHKERRQ(ierr);
1819       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scaling strategy):        %d \n",mumps->id.ICNTL(8));CHKERRQ(ierr);
1820       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements):  %d \n",mumps->id.ICNTL(10));CHKERRQ(ierr);
1821       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):          %d \n",mumps->id.ICNTL(11));CHKERRQ(ierr);
1822       if (mumps->id.ICNTL(11)>0) {
1823         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(4) (inf norm of input mat):        %g\n",mumps->id.RINFOG(4));CHKERRQ(ierr);
1824         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(5) (inf norm of solution):         %g\n",mumps->id.RINFOG(5));CHKERRQ(ierr);
1825         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(6) (inf norm of residual):         %g\n",mumps->id.RINFOG(6));CHKERRQ(ierr);
1826         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",mumps->id.RINFOG(7),mumps->id.RINFOG(8));CHKERRQ(ierr);
1827         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(9) (error estimate):               %g \n",mumps->id.RINFOG(9));CHKERRQ(ierr);
1828         ierr = PetscViewerASCIIPrintf(viewer,"    RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",mumps->id.RINFOG(10),mumps->id.RINFOG(11));CHKERRQ(ierr);
1829       }
1830       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",mumps->id.ICNTL(12));CHKERRQ(ierr);
1831       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",mumps->id.ICNTL(13));CHKERRQ(ierr);
1832       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",mumps->id.ICNTL(14));CHKERRQ(ierr);
1833       /* ICNTL(15-17) not used */
1834       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",mumps->id.ICNTL(18));CHKERRQ(ierr);
1835       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Schur complement info):                      %d \n",mumps->id.ICNTL(19));CHKERRQ(ierr);
1836       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (rhs sparse pattern):                         %d \n",mumps->id.ICNTL(20));CHKERRQ(ierr);
1837       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d \n",mumps->id.ICNTL(21));CHKERRQ(ierr);
1838       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(22) (in-core/out-of-core facility):               %d \n",mumps->id.ICNTL(22));CHKERRQ(ierr);
1839       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(23) (max size of memory can be allocated locally):%d \n",mumps->id.ICNTL(23));CHKERRQ(ierr);
1840 
1841       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(24) (detection of null pivot rows):               %d \n",mumps->id.ICNTL(24));CHKERRQ(ierr);
1842       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(25) (computation of a null space basis):          %d \n",mumps->id.ICNTL(25));CHKERRQ(ierr);
1843       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(26) (Schur options for rhs or solution):          %d \n",mumps->id.ICNTL(26));CHKERRQ(ierr);
1844       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(27) (experimental parameter):                     %d \n",mumps->id.ICNTL(27));CHKERRQ(ierr);
1845       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(28) (use parallel or sequential ordering):        %d \n",mumps->id.ICNTL(28));CHKERRQ(ierr);
1846       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(29) (parallel ordering):                          %d \n",mumps->id.ICNTL(29));CHKERRQ(ierr);
1847 
1848       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(30) (user-specified set of entries in inv(A)):    %d \n",mumps->id.ICNTL(30));CHKERRQ(ierr);
1849       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(31) (factors is discarded in the solve phase):    %d \n",mumps->id.ICNTL(31));CHKERRQ(ierr);
1850       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(33) (compute determinant):                        %d \n",mumps->id.ICNTL(33));CHKERRQ(ierr);
1851       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(35) (activate BLR based factorization):           %d \n",mumps->id.ICNTL(35));CHKERRQ(ierr);
1852       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(36) (choice of BLR factorization variant):        %d \n",mumps->id.ICNTL(36));CHKERRQ(ierr);
1853       ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(38) (estimated compression rate of LU factors):   %d \n",mumps->id.ICNTL(38));CHKERRQ(ierr);
1854 
1855       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",mumps->id.CNTL(1));CHKERRQ(ierr);
1856       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",mumps->id.CNTL(2));CHKERRQ(ierr);
1857       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",mumps->id.CNTL(3));CHKERRQ(ierr);
1858       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",mumps->id.CNTL(4));CHKERRQ(ierr);
1859       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(5) (fixation for null pivots):         %g \n",mumps->id.CNTL(5));CHKERRQ(ierr);
1860       ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(7) (dropping parameter for BLR):       %g \n",mumps->id.CNTL(7));CHKERRQ(ierr);
1861 
1862       /* infomation local to each processor */
1863       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
1864       ierr = PetscViewerASCIIPushSynchronized(viewer);CHKERRQ(ierr);
1865       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %g \n",mumps->myid,mumps->id.RINFO(1));CHKERRQ(ierr);
1866       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1867       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
1868       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(2));CHKERRQ(ierr);
1869       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1870       ierr = PetscViewerASCIIPrintf(viewer, "  RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
1871       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d]  %g \n",mumps->myid,mumps->id.RINFO(3));CHKERRQ(ierr);
1872       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1873 
1874       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);
1875       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] %d \n",mumps->myid,mumps->id.INFO(15));CHKERRQ(ierr);
1876       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1877 
1878       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);
1879       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d \n",mumps->myid,mumps->id.INFO(16));CHKERRQ(ierr);
1880       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1881 
1882       ierr = PetscViewerASCIIPrintf(viewer, "  INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);
1883       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d \n",mumps->myid,mumps->id.INFO(23));CHKERRQ(ierr);
1884       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1885 
1886       if (mumps->ninfo && mumps->ninfo <= 80){
1887         PetscInt i;
1888         for (i=0; i<mumps->ninfo; i++){
1889           ierr = PetscViewerASCIIPrintf(viewer, "  INFO(%d): \n",mumps->info[i]);CHKERRQ(ierr);
1890           ierr = PetscViewerASCIISynchronizedPrintf(viewer,"    [%d] %d \n",mumps->myid,mumps->id.INFO(mumps->info[i]));CHKERRQ(ierr);
1891           ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1892         }
1893       }
1894       ierr = PetscViewerASCIIPopSynchronized(viewer);CHKERRQ(ierr);
1895 
1896       if (!mumps->myid) { /* information from the host */
1897         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",mumps->id.RINFOG(1));CHKERRQ(ierr);
1898         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",mumps->id.RINFOG(2));CHKERRQ(ierr);
1899         ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",mumps->id.RINFOG(3));CHKERRQ(ierr);
1900         ierr = PetscViewerASCIIPrintf(viewer,"  (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n",mumps->id.RINFOG(12),mumps->id.RINFOG(13),mumps->id.INFOG(34));CHKERRQ(ierr);
1901 
1902         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(3));CHKERRQ(ierr);
1903         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",mumps->id.INFOG(4));CHKERRQ(ierr);
1904         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",mumps->id.INFOG(5));CHKERRQ(ierr);
1905         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",mumps->id.INFOG(6));CHKERRQ(ierr);
1906         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively use after analysis): %d \n",mumps->id.INFOG(7));CHKERRQ(ierr);
1907         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",mumps->id.INFOG(8));CHKERRQ(ierr);
1908         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",mumps->id.INFOG(9));CHKERRQ(ierr);
1909         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d \n",mumps->id.INFOG(10));CHKERRQ(ierr);
1910         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d \n",mumps->id.INFOG(11));CHKERRQ(ierr);
1911         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",mumps->id.INFOG(12));CHKERRQ(ierr);
1912         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",mumps->id.INFOG(13));CHKERRQ(ierr);
1913         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",mumps->id.INFOG(14));CHKERRQ(ierr);
1914         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",mumps->id.INFOG(15));CHKERRQ(ierr);
1915         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",mumps->id.INFOG(16));CHKERRQ(ierr);
1916         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",mumps->id.INFOG(17));CHKERRQ(ierr);
1917         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",mumps->id.INFOG(18));CHKERRQ(ierr);
1918         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",mumps->id.INFOG(19));CHKERRQ(ierr);
1919         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",mumps->id.INFOG(20));CHKERRQ(ierr);
1920         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",mumps->id.INFOG(21));CHKERRQ(ierr);
1921         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",mumps->id.INFOG(22));CHKERRQ(ierr);
1922         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",mumps->id.INFOG(23));CHKERRQ(ierr);
1923         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",mumps->id.INFOG(24));CHKERRQ(ierr);
1924         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",mumps->id.INFOG(25));CHKERRQ(ierr);
1925         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(28) (after factorization: number of null pivots encountered): %d\n",mumps->id.INFOG(28));CHKERRQ(ierr);
1926         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n",mumps->id.INFOG(29));CHKERRQ(ierr);
1927         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n",mumps->id.INFOG(30),mumps->id.INFOG(31));CHKERRQ(ierr);
1928         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(32) (after analysis: type of analysis done): %d\n",mumps->id.INFOG(32));CHKERRQ(ierr);
1929         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(33) (value used for ICNTL(8)): %d\n",mumps->id.INFOG(33));CHKERRQ(ierr);
1930         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(34) (exponent of the determinant if determinant is requested): %d\n",mumps->id.INFOG(34));CHKERRQ(ierr);
1931         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n",mumps->id.INFOG(35));CHKERRQ(ierr);
1932         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(36));CHKERRQ(ierr);
1933         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d \n",mumps->id.INFOG(37));CHKERRQ(ierr);
1934         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d \n",mumps->id.INFOG(38));CHKERRQ(ierr);
1935         ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d \n",mumps->id.INFOG(39));CHKERRQ(ierr);
1936       }
1937     }
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 PetscErrorCode MatGetInfo_MUMPS(Mat A,MatInfoType flag,MatInfo *info)
1943 {
1944   Mat_MUMPS *mumps =(Mat_MUMPS*)A->data;
1945 
1946   PetscFunctionBegin;
1947   info->block_size        = 1.0;
1948   info->nz_allocated      = mumps->id.INFOG(20);
1949   info->nz_used           = mumps->id.INFOG(20);
1950   info->nz_unneeded       = 0.0;
1951   info->assemblies        = 0.0;
1952   info->mallocs           = 0.0;
1953   info->memory            = 0.0;
1954   info->fill_ratio_given  = 0;
1955   info->fill_ratio_needed = 0;
1956   info->factor_mallocs    = 0;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 /* -------------------------------------------------------------------------------------------*/
1961 PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is)
1962 {
1963   Mat_MUMPS         *mumps =(Mat_MUMPS*)F->data;
1964   const PetscScalar *arr;
1965   const PetscInt    *idxs;
1966   PetscInt          size,i;
1967   PetscErrorCode    ierr;
1968 
1969   PetscFunctionBegin;
1970   ierr = ISGetLocalSize(is,&size);CHKERRQ(ierr);
1971   if (mumps->petsc_size > 1) {
1972     PetscBool ls,gs; /* gs is false if any rank other than root has non-empty IS */
1973 
1974     ls   = mumps->myid ? (size ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */
1975     ierr = MPI_Allreduce(&ls,&gs,1,MPIU_BOOL,MPI_LAND,mumps->petsc_comm);CHKERRQ(ierr);
1976     if (!gs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MUMPS distributed parallel Schur complements not yet supported from PETSc\n");
1977   }
1978 
1979   /* Schur complement matrix */
1980   ierr = MatDestroy(&F->schur);CHKERRQ(ierr);
1981   ierr = MatCreateSeqDense(PETSC_COMM_SELF,size,size,NULL,&F->schur);CHKERRQ(ierr);
1982   ierr = MatDenseGetArrayRead(F->schur,&arr);CHKERRQ(ierr);
1983   mumps->id.schur      = (MumpsScalar*)arr;
1984   mumps->id.size_schur = size;
1985   mumps->id.schur_lld  = size;
1986   ierr = MatDenseRestoreArrayRead(F->schur,&arr);CHKERRQ(ierr);
1987   if (mumps->sym == 1) {
1988     ierr = MatSetOption(F->schur,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr);
1989   }
1990 
1991   /* MUMPS expects Fortran style indices */
1992   ierr = PetscFree(mumps->id.listvar_schur);CHKERRQ(ierr);
1993   ierr = PetscMalloc1(size,&mumps->id.listvar_schur);CHKERRQ(ierr);
1994   ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr);
1995   ierr = PetscArraycpy(mumps->id.listvar_schur,idxs,size);CHKERRQ(ierr);
1996   for (i=0;i<size;i++) mumps->id.listvar_schur[i]++;
1997   ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr);
1998   if (mumps->petsc_size > 1) {
1999     mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */
2000   } else {
2001     if (F->factortype == MAT_FACTOR_LU) {
2002       mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */
2003     } else {
2004       mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */
2005     }
2006   }
2007   /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */
2008   mumps->id.ICNTL(26) = -1;
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 /* -------------------------------------------------------------------------------------------*/
2013 PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F,Mat* S)
2014 {
2015   Mat            St;
2016   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2017   PetscScalar    *array;
2018 #if defined(PETSC_USE_COMPLEX)
2019   PetscScalar    im = PetscSqrtScalar((PetscScalar)-1.0);
2020 #endif
2021   PetscErrorCode ierr;
2022 
2023   PetscFunctionBegin;
2024   if (!mumps->id.ICNTL(19)) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ORDER,"Schur complement mode not selected! You should call MatFactorSetSchurIS to enable it");
2025   ierr = MatCreate(PETSC_COMM_SELF,&St);CHKERRQ(ierr);
2026   ierr = MatSetSizes(St,PETSC_DECIDE,PETSC_DECIDE,mumps->id.size_schur,mumps->id.size_schur);CHKERRQ(ierr);
2027   ierr = MatSetType(St,MATDENSE);CHKERRQ(ierr);
2028   ierr = MatSetUp(St);CHKERRQ(ierr);
2029   ierr = MatDenseGetArray(St,&array);CHKERRQ(ierr);
2030   if (!mumps->sym) { /* MUMPS always return a full matrix */
2031     if (mumps->id.ICNTL(19) == 1) { /* stored by rows */
2032       PetscInt i,j,N=mumps->id.size_schur;
2033       for (i=0;i<N;i++) {
2034         for (j=0;j<N;j++) {
2035 #if !defined(PETSC_USE_COMPLEX)
2036           PetscScalar val = mumps->id.schur[i*N+j];
2037 #else
2038           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2039 #endif
2040           array[j*N+i] = val;
2041         }
2042       }
2043     } else { /* stored by columns */
2044       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2045     }
2046   } else { /* either full or lower-triangular (not packed) */
2047     if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */
2048       PetscInt i,j,N=mumps->id.size_schur;
2049       for (i=0;i<N;i++) {
2050         for (j=i;j<N;j++) {
2051 #if !defined(PETSC_USE_COMPLEX)
2052           PetscScalar val = mumps->id.schur[i*N+j];
2053 #else
2054           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2055 #endif
2056           array[i*N+j] = val;
2057           array[j*N+i] = val;
2058         }
2059       }
2060     } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */
2061       ierr = PetscArraycpy(array,mumps->id.schur,mumps->id.size_schur*mumps->id.size_schur);CHKERRQ(ierr);
2062     } else { /* ICNTL(19) == 1 lower triangular stored by rows */
2063       PetscInt i,j,N=mumps->id.size_schur;
2064       for (i=0;i<N;i++) {
2065         for (j=0;j<i+1;j++) {
2066 #if !defined(PETSC_USE_COMPLEX)
2067           PetscScalar val = mumps->id.schur[i*N+j];
2068 #else
2069           PetscScalar val = mumps->id.schur[i*N+j].r + im*mumps->id.schur[i*N+j].i;
2070 #endif
2071           array[i*N+j] = val;
2072           array[j*N+i] = val;
2073         }
2074       }
2075     }
2076   }
2077   ierr = MatDenseRestoreArray(St,&array);CHKERRQ(ierr);
2078   *S   = St;
2079   PetscFunctionReturn(0);
2080 }
2081 
2082 /* -------------------------------------------------------------------------------------------*/
2083 PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt ival)
2084 {
2085   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2086 
2087   PetscFunctionBegin;
2088   mumps->id.ICNTL(icntl) = ival;
2089   PetscFunctionReturn(0);
2090 }
2091 
2092 PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F,PetscInt icntl,PetscInt *ival)
2093 {
2094   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2095 
2096   PetscFunctionBegin;
2097   *ival = mumps->id.ICNTL(icntl);
2098   PetscFunctionReturn(0);
2099 }
2100 
2101 /*@
2102   MatMumpsSetIcntl - Set MUMPS parameter ICNTL()
2103 
2104    Logically Collective on Mat
2105 
2106    Input Parameters:
2107 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2108 .  icntl - index of MUMPS parameter array ICNTL()
2109 -  ival - value of MUMPS ICNTL(icntl)
2110 
2111   Options Database:
2112 .   -mat_mumps_icntl_<icntl> <ival>
2113 
2114    Level: beginner
2115 
2116    References:
2117 .     MUMPS Users' Guide
2118 
2119 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2120  @*/
2121 PetscErrorCode MatMumpsSetIcntl(Mat F,PetscInt icntl,PetscInt ival)
2122 {
2123   PetscErrorCode ierr;
2124 
2125   PetscFunctionBegin;
2126   PetscValidType(F,1);
2127   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2128   PetscValidLogicalCollectiveInt(F,icntl,2);
2129   PetscValidLogicalCollectiveInt(F,ival,3);
2130   ierr = PetscTryMethod(F,"MatMumpsSetIcntl_C",(Mat,PetscInt,PetscInt),(F,icntl,ival));CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 /*@
2135   MatMumpsGetIcntl - Get MUMPS parameter ICNTL()
2136 
2137    Logically Collective on Mat
2138 
2139    Input Parameters:
2140 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2141 -  icntl - index of MUMPS parameter array ICNTL()
2142 
2143   Output Parameter:
2144 .  ival - value of MUMPS ICNTL(icntl)
2145 
2146    Level: beginner
2147 
2148    References:
2149 .     MUMPS Users' Guide
2150 
2151 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2152 @*/
2153 PetscErrorCode MatMumpsGetIcntl(Mat F,PetscInt icntl,PetscInt *ival)
2154 {
2155   PetscErrorCode ierr;
2156 
2157   PetscFunctionBegin;
2158   PetscValidType(F,1);
2159   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2160   PetscValidLogicalCollectiveInt(F,icntl,2);
2161   PetscValidIntPointer(ival,3);
2162   ierr = PetscUseMethod(F,"MatMumpsGetIcntl_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 /* -------------------------------------------------------------------------------------------*/
2167 PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal val)
2168 {
2169   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2170 
2171   PetscFunctionBegin;
2172   mumps->id.CNTL(icntl) = val;
2173   PetscFunctionReturn(0);
2174 }
2175 
2176 PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F,PetscInt icntl,PetscReal *val)
2177 {
2178   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2179 
2180   PetscFunctionBegin;
2181   *val = mumps->id.CNTL(icntl);
2182   PetscFunctionReturn(0);
2183 }
2184 
2185 /*@
2186   MatMumpsSetCntl - Set MUMPS parameter CNTL()
2187 
2188    Logically Collective on Mat
2189 
2190    Input Parameters:
2191 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2192 .  icntl - index of MUMPS parameter array CNTL()
2193 -  val - value of MUMPS CNTL(icntl)
2194 
2195   Options Database:
2196 .   -mat_mumps_cntl_<icntl> <val>
2197 
2198    Level: beginner
2199 
2200    References:
2201 .     MUMPS Users' Guide
2202 
2203 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2204 @*/
2205 PetscErrorCode MatMumpsSetCntl(Mat F,PetscInt icntl,PetscReal val)
2206 {
2207   PetscErrorCode ierr;
2208 
2209   PetscFunctionBegin;
2210   PetscValidType(F,1);
2211   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2212   PetscValidLogicalCollectiveInt(F,icntl,2);
2213   PetscValidLogicalCollectiveReal(F,val,3);
2214   ierr = PetscTryMethod(F,"MatMumpsSetCntl_C",(Mat,PetscInt,PetscReal),(F,icntl,val));CHKERRQ(ierr);
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 /*@
2219   MatMumpsGetCntl - Get MUMPS parameter CNTL()
2220 
2221    Logically Collective on Mat
2222 
2223    Input Parameters:
2224 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2225 -  icntl - index of MUMPS parameter array CNTL()
2226 
2227   Output Parameter:
2228 .  val - value of MUMPS CNTL(icntl)
2229 
2230    Level: beginner
2231 
2232    References:
2233 .      MUMPS Users' Guide
2234 
2235 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2236 @*/
2237 PetscErrorCode MatMumpsGetCntl(Mat F,PetscInt icntl,PetscReal *val)
2238 {
2239   PetscErrorCode ierr;
2240 
2241   PetscFunctionBegin;
2242   PetscValidType(F,1);
2243   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2244   PetscValidLogicalCollectiveInt(F,icntl,2);
2245   PetscValidRealPointer(val,3);
2246   ierr = PetscUseMethod(F,"MatMumpsGetCntl_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2247   PetscFunctionReturn(0);
2248 }
2249 
2250 PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F,PetscInt icntl,PetscInt *info)
2251 {
2252   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2253 
2254   PetscFunctionBegin;
2255   *info = mumps->id.INFO(icntl);
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F,PetscInt icntl,PetscInt *infog)
2260 {
2261   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2262 
2263   PetscFunctionBegin;
2264   *infog = mumps->id.INFOG(icntl);
2265   PetscFunctionReturn(0);
2266 }
2267 
2268 PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfo)
2269 {
2270   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2271 
2272   PetscFunctionBegin;
2273   *rinfo = mumps->id.RINFO(icntl);
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F,PetscInt icntl,PetscReal *rinfog)
2278 {
2279   Mat_MUMPS *mumps =(Mat_MUMPS*)F->data;
2280 
2281   PetscFunctionBegin;
2282   *rinfog = mumps->id.RINFOG(icntl);
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F,Mat spRHS)
2287 {
2288   PetscErrorCode ierr;
2289   Mat            Bt = NULL,Btseq = NULL;
2290   PetscBool      flg;
2291   Mat_MUMPS      *mumps =(Mat_MUMPS*)F->data;
2292   PetscScalar    *aa;
2293   PetscInt       spnr,*ia,*ja,M,nrhs;
2294 
2295   PetscFunctionBegin;
2296   PetscValidIntPointer(spRHS,2);
2297   ierr = PetscObjectTypeCompare((PetscObject)spRHS,MATTRANSPOSEMAT,&flg);CHKERRQ(ierr);
2298   if (flg) {
2299     ierr = MatTransposeGetMat(spRHS,&Bt);CHKERRQ(ierr);
2300   } else SETERRQ(PetscObjectComm((PetscObject)spRHS),PETSC_ERR_ARG_WRONG,"Matrix spRHS must be type MATTRANSPOSEMAT matrix");
2301 
2302   ierr = MatMumpsSetIcntl(F,30,1);CHKERRQ(ierr);
2303 
2304   if (mumps->petsc_size > 1) {
2305     Mat_MPIAIJ *b = (Mat_MPIAIJ*)Bt->data;
2306     Btseq = b->A;
2307   } else {
2308     Btseq = Bt;
2309   }
2310 
2311   ierr = MatGetSize(spRHS,&M,&nrhs);CHKERRQ(ierr);
2312   mumps->id.nrhs = nrhs;
2313   mumps->id.lrhs = M;
2314   mumps->id.rhs  = NULL;
2315 
2316   if (!mumps->myid) {
2317     ierr = MatSeqAIJGetArray(Btseq,&aa);CHKERRQ(ierr);
2318     ierr = MatGetRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2319     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2320 
2321     mumps->id.irhs_ptr    = ia;
2322     mumps->id.irhs_sparse = ja;
2323     mumps->id.nz_rhs      = ia[spnr] - 1;
2324     mumps->id.rhs_sparse  = (MumpsScalar*)aa;
2325   } else {
2326     mumps->id.irhs_ptr    = NULL;
2327     mumps->id.irhs_sparse = NULL;
2328     mumps->id.nz_rhs      = 0;
2329     mumps->id.rhs_sparse  = NULL;
2330   }
2331   mumps->id.ICNTL(20)   = 1; /* rhs is sparse */
2332   mumps->id.ICNTL(21)   = 0; /* solution is in assembled centralized format */
2333 
2334   /* solve phase */
2335   /*-------------*/
2336   mumps->id.job = JOB_SOLVE;
2337   PetscMUMPS_c(mumps);
2338   if (mumps->id.INFOG(1) < 0)
2339     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d INFO(2)=%d\n",mumps->id.INFOG(1),mumps->id.INFO(2));
2340 
2341   if (!mumps->myid) {
2342     ierr = MatSeqAIJRestoreArray(Btseq,&aa);CHKERRQ(ierr);
2343     ierr = MatRestoreRowIJ(Btseq,1,PETSC_FALSE,PETSC_FALSE,&spnr,(const PetscInt**)&ia,(const PetscInt**)&ja,&flg);CHKERRQ(ierr);
2344     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot get IJ structure");
2345   }
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 /*@
2350   MatMumpsGetInverse - Get user-specified set of entries in inverse of A
2351 
2352    Logically Collective on Mat
2353 
2354    Input Parameters:
2355 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2356 -  spRHS - sequential sparse matrix in MATTRANSPOSEMAT format holding specified indices in processor[0]
2357 
2358   Output Parameter:
2359 . spRHS - requested entries of inverse of A
2360 
2361    Level: beginner
2362 
2363    References:
2364 .      MUMPS Users' Guide
2365 
2366 .seealso: MatGetFactor(), MatCreateTranspose()
2367 @*/
2368 PetscErrorCode MatMumpsGetInverse(Mat F,Mat spRHS)
2369 {
2370   PetscErrorCode ierr;
2371 
2372   PetscFunctionBegin;
2373   PetscValidType(F,1);
2374   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2375   ierr = PetscUseMethod(F,"MatMumpsGetInverse_C",(Mat,Mat),(F,spRHS));CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F,Mat spRHST)
2380 {
2381   PetscErrorCode ierr;
2382   Mat            spRHS;
2383 
2384   PetscFunctionBegin;
2385   ierr = MatCreateTranspose(spRHST,&spRHS);CHKERRQ(ierr);
2386   ierr = MatMumpsGetInverse_MUMPS(F,spRHS);CHKERRQ(ierr);
2387   ierr = MatDestroy(&spRHS);CHKERRQ(ierr);
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 /*@
2392   MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix A^T
2393 
2394    Logically Collective on Mat
2395 
2396    Input Parameters:
2397 +  F - the factored matrix of A obtained by calling MatGetFactor() from PETSc-MUMPS interface
2398 -  spRHST - sequential sparse matrix in MATAIJ format holding specified indices of A^T in processor[0]
2399 
2400   Output Parameter:
2401 . spRHST - requested entries of inverse of A^T
2402 
2403    Level: beginner
2404 
2405    References:
2406 .      MUMPS Users' Guide
2407 
2408 .seealso: MatGetFactor(), MatCreateTranspose(), MatMumpsGetInverse()
2409 @*/
2410 PetscErrorCode MatMumpsGetInverseTranspose(Mat F,Mat spRHST)
2411 {
2412   PetscErrorCode ierr;
2413   PetscBool      flg;
2414 
2415   PetscFunctionBegin;
2416   PetscValidType(F,1);
2417   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2418   ierr = PetscObjectTypeCompareAny((PetscObject)spRHST,&flg,MATSEQAIJ,MATMPIAIJ,NULL);CHKERRQ(ierr);
2419   if (!flg) SETERRQ(PetscObjectComm((PetscObject)spRHST),PETSC_ERR_ARG_WRONG,"Matrix spRHST must be MATAIJ matrix");
2420 
2421   ierr = PetscUseMethod(F,"MatMumpsGetInverseTranspose_C",(Mat,Mat),(F,spRHST));CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 /*@
2426   MatMumpsGetInfo - Get MUMPS parameter INFO()
2427 
2428    Logically Collective on Mat
2429 
2430    Input Parameters:
2431 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2432 -  icntl - index of MUMPS parameter array INFO()
2433 
2434   Output Parameter:
2435 .  ival - value of MUMPS INFO(icntl)
2436 
2437    Level: beginner
2438 
2439    References:
2440 .      MUMPS Users' Guide
2441 
2442 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2443 @*/
2444 PetscErrorCode MatMumpsGetInfo(Mat F,PetscInt icntl,PetscInt *ival)
2445 {
2446   PetscErrorCode ierr;
2447 
2448   PetscFunctionBegin;
2449   PetscValidType(F,1);
2450   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2451   PetscValidIntPointer(ival,3);
2452   ierr = PetscUseMethod(F,"MatMumpsGetInfo_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 /*@
2457   MatMumpsGetInfog - Get MUMPS parameter INFOG()
2458 
2459    Logically Collective on Mat
2460 
2461    Input Parameters:
2462 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2463 -  icntl - index of MUMPS parameter array INFOG()
2464 
2465   Output Parameter:
2466 .  ival - value of MUMPS INFOG(icntl)
2467 
2468    Level: beginner
2469 
2470    References:
2471 .      MUMPS Users' Guide
2472 
2473 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2474 @*/
2475 PetscErrorCode MatMumpsGetInfog(Mat F,PetscInt icntl,PetscInt *ival)
2476 {
2477   PetscErrorCode ierr;
2478 
2479   PetscFunctionBegin;
2480   PetscValidType(F,1);
2481   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2482   PetscValidIntPointer(ival,3);
2483   ierr = PetscUseMethod(F,"MatMumpsGetInfog_C",(Mat,PetscInt,PetscInt*),(F,icntl,ival));CHKERRQ(ierr);
2484   PetscFunctionReturn(0);
2485 }
2486 
2487 /*@
2488   MatMumpsGetRinfo - Get MUMPS parameter RINFO()
2489 
2490    Logically Collective on Mat
2491 
2492    Input Parameters:
2493 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2494 -  icntl - index of MUMPS parameter array RINFO()
2495 
2496   Output Parameter:
2497 .  val - value of MUMPS RINFO(icntl)
2498 
2499    Level: beginner
2500 
2501    References:
2502 .       MUMPS Users' Guide
2503 
2504 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2505 @*/
2506 PetscErrorCode MatMumpsGetRinfo(Mat F,PetscInt icntl,PetscReal *val)
2507 {
2508   PetscErrorCode ierr;
2509 
2510   PetscFunctionBegin;
2511   PetscValidType(F,1);
2512   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2513   PetscValidRealPointer(val,3);
2514   ierr = PetscUseMethod(F,"MatMumpsGetRinfo_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 /*@
2519   MatMumpsGetRinfog - Get MUMPS parameter RINFOG()
2520 
2521    Logically Collective on Mat
2522 
2523    Input Parameters:
2524 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-MUMPS interface
2525 -  icntl - index of MUMPS parameter array RINFOG()
2526 
2527   Output Parameter:
2528 .  val - value of MUMPS RINFOG(icntl)
2529 
2530    Level: beginner
2531 
2532    References:
2533 .      MUMPS Users' Guide
2534 
2535 .seealso: MatGetFactor(), MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog()
2536 @*/
2537 PetscErrorCode MatMumpsGetRinfog(Mat F,PetscInt icntl,PetscReal *val)
2538 {
2539   PetscErrorCode ierr;
2540 
2541   PetscFunctionBegin;
2542   PetscValidType(F,1);
2543   if (!F->factortype) SETERRQ(PetscObjectComm((PetscObject)F),PETSC_ERR_ARG_WRONGSTATE,"Only for factored matrix");
2544   PetscValidRealPointer(val,3);
2545   ierr = PetscUseMethod(F,"MatMumpsGetRinfog_C",(Mat,PetscInt,PetscReal*),(F,icntl,val));CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 
2549 /*MC
2550   MATSOLVERMUMPS -  A matrix type providing direct solvers (LU and Cholesky) for
2551   distributed and sequential matrices via the external package MUMPS.
2552 
2553   Works with MATAIJ and MATSBAIJ matrices
2554 
2555   Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS
2556 
2557   Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. See details below.
2558 
2559   Use -pc_type cholesky or lu -pc_factor_mat_solver_type mumps to use this direct solver
2560 
2561   Options Database Keys:
2562 +  -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages
2563 .  -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning
2564 .  -mat_mumps_icntl_3 -  ICNTL(3): output stream for global information, collected on the host
2565 .  -mat_mumps_icntl_4 -  ICNTL(4): level of printing (0 to 4)
2566 .  -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)
2567 .  -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis (0 to 7). 3=Scotch, 4=PORD, 5=Metis
2568 .  -mat_mumps_icntl_8  - ICNTL(8): scaling strategy (-2 to 8 or 77)
2569 .  -mat_mumps_icntl_10  - ICNTL(10): max num of refinements
2570 .  -mat_mumps_icntl_11  - ICNTL(11): statistics related to an error analysis (via -ksp_view)
2571 .  -mat_mumps_icntl_12  - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)
2572 .  -mat_mumps_icntl_13  - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting
2573 .  -mat_mumps_icntl_14  - ICNTL(14): percentage increase in the estimated working space
2574 .  -mat_mumps_icntl_19  - ICNTL(19): computes the Schur complement
2575 .  -mat_mumps_icntl_22  - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)
2576 .  -mat_mumps_icntl_23  - ICNTL(23): max size of the working memory (MB) that can allocate per processor
2577 .  -mat_mumps_icntl_24  - ICNTL(24): detection of null pivot rows (0 or 1)
2578 .  -mat_mumps_icntl_25  - ICNTL(25): compute a solution of a deficient matrix and a null space basis
2579 .  -mat_mumps_icntl_26  - ICNTL(26): drives the solution phase if a Schur complement matrix
2580 .  -mat_mumps_icntl_28  - ICNTL(28): use 1 for sequential analysis and ictnl(7) ordering, or 2 for parallel analysis and ictnl(29) ordering
2581 .  -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis
2582 .  -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A)
2583 .  -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization
2584 .  -mat_mumps_icntl_33 - ICNTL(33): compute determinant
2585 .  -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature
2586 .  -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant
2587 .  -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR
2588 .  -mat_mumps_cntl_1  - CNTL(1): relative pivoting threshold
2589 .  -mat_mumps_cntl_2  -  CNTL(2): stopping criterion of refinement
2590 .  -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold
2591 .  -mat_mumps_cntl_4 - CNTL(4): value for static pivoting
2592 .  -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots
2593 .  -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization
2594 -  -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS.
2595                                    Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual.
2596 
2597   Level: beginner
2598 
2599     Notes:
2600     MUMPS Cholesky does not handle (complex) Hermitian matrices http://mumps.enseeiht.fr/doc/userguide_5.2.1.pdf so using it will error if the matrix is Hermitian.
2601 
2602     When a MUMPS factorization fails inside a KSP solve, for example with a KSP_DIVERGED_PC_FAILED, one can find the MUMPS information about the failure by calling
2603 $          KSPGetPC(ksp,&pc);
2604 $          PCFactorGetMatrix(pc,&mat);
2605 $          MatMumpsGetInfo(mat,....);
2606 $          MatMumpsGetInfog(mat,....); etc.
2607            Or you can run with -ksp_error_if_not_converged and the program will be stopped and the information printed in the error message.
2608 
2609    Two modes to run MUMPS/PETSc with OpenMP
2610 
2611 $     Set OMP_NUM_THREADS and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP
2612 $     threads per rank, then you may use "export OMP_NUM_THREADS=16 && mpirun -n 4 ./test".
2613 
2614 $     -mat_mumps_use_omp_threads [m] and run your code with as many MPI ranks as the number of cores. For example,
2615 $     if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16"
2616 
2617    To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part
2618    (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with --with-openmp --download-hwloc
2619    (or --with-hwloc), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS
2620    libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS
2621    (PETSc will automatically try to utilized a threaded BLAS if --with-openmp is provided).
2622 
2623    If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI
2624    processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of
2625    size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm
2626    are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set
2627    by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs.
2628    In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets,
2629    if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind
2630    MPI ranks to cores, then with -mat_mumps_use_omp_threads 16, a master rank (and threads it spawns) will use half cores in socket 0, and half
2631    cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the
2632    problem will not happen. Therefore, when you use -mat_mumps_use_omp_threads, you need to keep an eye on your MPI rank mapping and CPU binding.
2633    For example, with the Slurm job scheduler, one can use srun --cpu-bind=verbose -m block:block to map consecutive MPI ranks to sockets and
2634    examine the mapping result.
2635 
2636    PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set OMP_PROC_BIND and OMP_PLACES in job scripts,
2637    for example, export OMP_PLACES=threads and export OMP_PROC_BIND=spread. One does not need to export OMP_NUM_THREADS=m in job scripts as PETSc
2638    calls omp_set_num_threads(m) internally before calling MUMPS.
2639 
2640    References:
2641 +   1. - Heroux, Michael A., R. Brightwell, and Michael M. Wolf. "Bi-modal MPI and MPI+ threads computing on scalable multicore systems." IJHPCA (Submitted) (2011).
2642 -   2. - Gutierrez, Samuel K., et al. "Accommodating Thread-Level Heterogeneity in Coupled Parallel Applications." Parallel and Distributed Processing Symposium (IPDPS), 2017 IEEE International. IEEE, 2017.
2643 
2644 .seealso: PCFactorSetMatSolverType(), MatSolverType, MatMumpsSetICntl(), MatMumpsGetIcntl(), MatMumpsSetCntl(), MatMumpsGetCntl(), MatMumpsGetInfo(), MatMumpsGetInfog(), MatMumpsGetRinfo(), MatMumpsGetRinfog(), KSPGetPC(), PCGetFactor(), PCFactorGetMatrix()
2645 
2646 M*/
2647 
2648 static PetscErrorCode MatFactorGetSolverType_mumps(Mat A,MatSolverType *type)
2649 {
2650   PetscFunctionBegin;
2651   *type = MATSOLVERMUMPS;
2652   PetscFunctionReturn(0);
2653 }
2654 
2655 /* MatGetFactor for Seq and MPI AIJ matrices */
2656 static PetscErrorCode MatGetFactor_aij_mumps(Mat A,MatFactorType ftype,Mat *F)
2657 {
2658   Mat            B;
2659   PetscErrorCode ierr;
2660   Mat_MUMPS      *mumps;
2661   PetscBool      isSeqAIJ;
2662 
2663   PetscFunctionBegin;
2664  #if defined(PETSC_USE_COMPLEX)
2665   if (A->hermitian && !A->symmetric && ftype == MAT_FACTOR_CHOLESKY) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
2666  #endif
2667   /* Create the factorization matrix */
2668   ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isSeqAIJ);CHKERRQ(ierr);
2669   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2670   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2671   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
2672   ierr = MatSetUp(B);CHKERRQ(ierr);
2673 
2674   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
2675 
2676   B->ops->view    = MatView_MUMPS;
2677   B->ops->getinfo = MatGetInfo_MUMPS;
2678 
2679   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
2680   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
2681   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
2682   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
2683   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
2684   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
2685   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
2686   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
2687   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
2688   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
2689   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
2690   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
2691   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
2692 
2693   if (ftype == MAT_FACTOR_LU) {
2694     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
2695     B->factortype            = MAT_FACTOR_LU;
2696     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij;
2697     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij;
2698     mumps->sym = 0;
2699   } else {
2700     B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
2701     B->factortype                  = MAT_FACTOR_CHOLESKY;
2702     if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij;
2703     else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij;
2704 #if defined(PETSC_USE_COMPLEX)
2705     mumps->sym = 2;
2706 #else
2707     if (A->spd_set && A->spd) mumps->sym = 1;
2708     else                      mumps->sym = 2;
2709 #endif
2710   }
2711 
2712   /* set solvertype */
2713   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
2714   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
2715 
2716   B->ops->destroy = MatDestroy_MUMPS;
2717   B->data         = (void*)mumps;
2718 
2719   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
2720 
2721   *F = B;
2722   PetscFunctionReturn(0);
2723 }
2724 
2725 /* MatGetFactor for Seq and MPI SBAIJ matrices */
2726 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A,MatFactorType ftype,Mat *F)
2727 {
2728   Mat            B;
2729   PetscErrorCode ierr;
2730   Mat_MUMPS      *mumps;
2731   PetscBool      isSeqSBAIJ;
2732 
2733   PetscFunctionBegin;
2734  #if defined(PETSC_USE_COMPLEX)
2735   if (A->hermitian && !A->symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Hermitian CHOLESKY Factor is not supported");
2736  #endif
2737   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2738   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2739   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
2740   ierr = MatSetUp(B);CHKERRQ(ierr);
2741 
2742   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
2743   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSBAIJ,&isSeqSBAIJ);CHKERRQ(ierr);
2744   if (isSeqSBAIJ) {
2745     mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij;
2746   } else {
2747     mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij;
2748   }
2749 
2750   B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS;
2751   B->ops->view                   = MatView_MUMPS;
2752   B->ops->getinfo                = MatGetInfo_MUMPS;
2753 
2754   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
2755   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
2756   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
2757   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
2758   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
2759   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
2760   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
2761   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
2762   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
2763   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
2764   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
2765   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
2766   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
2767 
2768   B->factortype = MAT_FACTOR_CHOLESKY;
2769 #if defined(PETSC_USE_COMPLEX)
2770   mumps->sym = 2;
2771 #else
2772   if (A->spd_set && A->spd) mumps->sym = 1;
2773   else                      mumps->sym = 2;
2774 #endif
2775 
2776   /* set solvertype */
2777   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
2778   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
2779 
2780   B->ops->destroy = MatDestroy_MUMPS;
2781   B->data         = (void*)mumps;
2782 
2783   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
2784 
2785   *F = B;
2786   PetscFunctionReturn(0);
2787 }
2788 
2789 static PetscErrorCode MatGetFactor_baij_mumps(Mat A,MatFactorType ftype,Mat *F)
2790 {
2791   Mat            B;
2792   PetscErrorCode ierr;
2793   Mat_MUMPS      *mumps;
2794   PetscBool      isSeqBAIJ;
2795 
2796   PetscFunctionBegin;
2797   /* Create the factorization matrix */
2798   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQBAIJ,&isSeqBAIJ);CHKERRQ(ierr);
2799   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2800   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2801   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
2802   ierr = MatSetUp(B);CHKERRQ(ierr);
2803 
2804   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
2805   if (ftype == MAT_FACTOR_LU) {
2806     B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS;
2807     B->factortype            = MAT_FACTOR_LU;
2808     if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij;
2809     else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij;
2810     mumps->sym = 0;
2811   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead\n");
2812 
2813   B->ops->view        = MatView_MUMPS;
2814   B->ops->getinfo     = MatGetInfo_MUMPS;
2815 
2816   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
2817   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
2818   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
2819   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
2820   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
2821   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
2822   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
2823   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
2824   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
2825   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
2826   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
2827   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverse_C",MatMumpsGetInverse_MUMPS);CHKERRQ(ierr);
2828   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInverseTranspose_C",MatMumpsGetInverseTranspose_MUMPS);CHKERRQ(ierr);
2829 
2830   /* set solvertype */
2831   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
2832   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
2833 
2834   B->ops->destroy = MatDestroy_MUMPS;
2835   B->data         = (void*)mumps;
2836 
2837   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
2838 
2839   *F = B;
2840   PetscFunctionReturn(0);
2841 }
2842 
2843 /* MatGetFactor for Seq and MPI SELL matrices */
2844 static PetscErrorCode MatGetFactor_sell_mumps(Mat A,MatFactorType ftype,Mat *F)
2845 {
2846   Mat            B;
2847   PetscErrorCode ierr;
2848   Mat_MUMPS      *mumps;
2849   PetscBool      isSeqSELL;
2850 
2851   PetscFunctionBegin;
2852   /* Create the factorization matrix */
2853   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQSELL,&isSeqSELL);CHKERRQ(ierr);
2854   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
2855   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N);CHKERRQ(ierr);
2856   ierr = PetscStrallocpy("mumps",&((PetscObject)B)->type_name);CHKERRQ(ierr);
2857   ierr = MatSetUp(B);CHKERRQ(ierr);
2858 
2859   ierr = PetscNewLog(B,&mumps);CHKERRQ(ierr);
2860 
2861   B->ops->view        = MatView_MUMPS;
2862   B->ops->getinfo     = MatGetInfo_MUMPS;
2863 
2864   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_mumps);CHKERRQ(ierr);
2865   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorSetSchurIS_C",MatFactorSetSchurIS_MUMPS);CHKERRQ(ierr);
2866   ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorCreateSchurComplement_C",MatFactorCreateSchurComplement_MUMPS);CHKERRQ(ierr);
2867   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetIcntl_C",MatMumpsSetIcntl_MUMPS);CHKERRQ(ierr);
2868   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetIcntl_C",MatMumpsGetIcntl_MUMPS);CHKERRQ(ierr);
2869   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsSetCntl_C",MatMumpsSetCntl_MUMPS);CHKERRQ(ierr);
2870   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetCntl_C",MatMumpsGetCntl_MUMPS);CHKERRQ(ierr);
2871   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfo_C",MatMumpsGetInfo_MUMPS);CHKERRQ(ierr);
2872   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetInfog_C",MatMumpsGetInfog_MUMPS);CHKERRQ(ierr);
2873   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfo_C",MatMumpsGetRinfo_MUMPS);CHKERRQ(ierr);
2874   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMumpsGetRinfog_C",MatMumpsGetRinfog_MUMPS);CHKERRQ(ierr);
2875 
2876   if (ftype == MAT_FACTOR_LU) {
2877     B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS;
2878     B->factortype            = MAT_FACTOR_LU;
2879     if (isSeqSELL) mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij;
2880     else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
2881     mumps->sym = 0;
2882   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"To be implemented");
2883 
2884   /* set solvertype */
2885   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
2886   ierr = PetscStrallocpy(MATSOLVERMUMPS,&B->solvertype);CHKERRQ(ierr);
2887 
2888   B->ops->destroy = MatDestroy_MUMPS;
2889   B->data         = (void*)mumps;
2890 
2891   ierr = PetscInitializeMUMPS(A,mumps);CHKERRQ(ierr);
2892 
2893   *F = B;
2894   PetscFunctionReturn(0);
2895 }
2896 
2897 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void)
2898 {
2899   PetscErrorCode ierr;
2900 
2901   PetscFunctionBegin;
2902   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
2903   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
2904   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
2905   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPIBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
2906   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATMPISBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
2907   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_mumps);CHKERRQ(ierr);
2908   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_mumps);CHKERRQ(ierr);
2909   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_LU,MatGetFactor_baij_mumps);CHKERRQ(ierr);
2910   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_baij_mumps);CHKERRQ(ierr);
2911   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSBAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
2912   ierr = MatSolverTypeRegister(MATSOLVERMUMPS,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_sell_mumps);CHKERRQ(ierr);
2913   PetscFunctionReturn(0);
2914 }
2915 
2916