xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision b52f573bf7dd30aec2f45ea71ed0f119e1fe824d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Provides an interface to the MUMPS sparse solver
5 */
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13 #include "zmumps_c.h"
14 #else
15 #include "dmumps_c.h"
16 #endif
17 EXTERN_C_END
18 #define JOB_INIT -1
19 #define JOB_END -2
20 /* macros s.t. indices match MUMPS documentation */
21 #define ICNTL(I) icntl[(I)-1]
22 #define CNTL(I) cntl[(I)-1]
23 #define INFOG(I) infog[(I)-1]
24 #define INFO(I) info[(I)-1]
25 #define RINFOG(I) rinfog[(I)-1]
26 #define RINFO(I) rinfo[(I)-1]
27 
28 typedef struct {
29 #if defined(PETSC_USE_COMPLEX)
30   ZMUMPS_STRUC_C id;
31 #else
32   DMUMPS_STRUC_C id;
33 #endif
34   MatStructure   matstruc;
35   PetscMPIInt    myid,size;
36   PetscInt       *irn,*jcn,sym,nSolve;
37   PetscScalar    *val;
38   MPI_Comm       comm_mumps;
39   VecScatter     scat_rhs, scat_sol;
40   PetscTruth     isAIJ,CleanUpMUMPS;
41   Vec            b_seq,x_seq;
42   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   PetscErrorCode (*MatView)(Mat,PetscViewer);
44   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48   PetscErrorCode (*specialdestroy)(Mat);
49   PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*);
50 } Mat_MUMPS;
51 
52 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*);
53 EXTERN_C_BEGIN
54 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*);
55 EXTERN_C_END
56 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */
57 /*
58   input:
59     A       - matrix in mpiaij or mpisbaij (bs=1) format
60     shift   - 0: C style output triple; 1: Fortran style output triple.
61     valOnly - FALSE: spaces are allocated and values are set for the triple
62               TRUE:  only the values in v array are updated
63   output:
64     nnz     - dim of r, c, and v (number of local nonzero entries of A)
65     r, c, v - row and col index, matrix values (matrix triples)
66  */
67 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) {
68   PetscInt       *ai, *aj, *bi, *bj, rstart,nz, *garray;
69   PetscErrorCode ierr;
70   PetscInt       i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol;
71   PetscInt       *row,*col;
72   PetscScalar    *av, *bv,*val;
73   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
74 
75   PetscFunctionBegin;
76   if (mumps->isAIJ){
77     Mat_MPIAIJ    *mat =  (Mat_MPIAIJ*)A->data;
78     Mat_SeqAIJ    *aa=(Mat_SeqAIJ*)(mat->A)->data;
79     Mat_SeqAIJ    *bb=(Mat_SeqAIJ*)(mat->B)->data;
80     nz = aa->nz + bb->nz;
81     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
82     garray = mat->garray;
83     av=aa->a; bv=bb->a;
84 
85   } else {
86     Mat_MPISBAIJ  *mat =  (Mat_MPISBAIJ*)A->data;
87     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
88     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
89     if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs);
90     nz = aa->nz + bb->nz;
91     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
92     garray = mat->garray;
93     av=aa->a; bv=bb->a;
94   }
95 
96   if (!valOnly){
97     ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr);
98     ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr);
99     ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr);
100     *r = row; *c = col; *v = val;
101   } else {
102     row = *r; col = *c; val = *v;
103   }
104   *nnz = nz;
105 
106   jj = 0; irow = rstart;
107   for ( i=0; i<m; i++ ) {
108     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
109     countA = ai[i+1] - ai[i];
110     countB = bi[i+1] - bi[i];
111     bjj = bj + bi[i];
112 
113     /* get jB, the starting local col index for the 2nd B-part */
114     colA_start = rstart + ajj[0]; /* the smallest col index for A */
115     j=-1;
116     do {
117       j++;
118       if (j == countB) break;
119       jcol = garray[bjj[j]];
120     } while (jcol < colA_start);
121     jB = j;
122 
123     /* B-part, smaller col index */
124     colA_start = rstart + ajj[0]; /* the smallest col index for A */
125     for (j=0; j<jB; j++){
126       jcol = garray[bjj[j]];
127       if (!valOnly){
128         row[jj] = irow + shift; col[jj] = jcol + shift;
129 
130       }
131       val[jj++] = *bv++;
132     }
133     /* A-part */
134     for (j=0; j<countA; j++){
135       if (!valOnly){
136         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
137       }
138       val[jj++] = *av++;
139     }
140     /* B-part, larger col index */
141     for (j=jB; j<countB; j++){
142       if (!valOnly){
143         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
144       }
145       val[jj++] = *bv++;
146     }
147     irow++;
148   }
149 
150   PetscFunctionReturn(0);
151 }
152 
153 EXTERN_C_BEGIN
154 #undef __FUNCT__
155 #define __FUNCT__ "MatConvert_MUMPS_Base"
156 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
157 {
158   PetscErrorCode ierr;
159   Mat            B=*newmat;
160   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
161   void           (*f)(void);
162 
163   PetscFunctionBegin;
164   if (reuse == MAT_INITIAL_MATRIX) {
165     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
166   }
167   B->ops->duplicate              = mumps->MatDuplicate;
168   B->ops->view                   = mumps->MatView;
169   B->ops->assemblyend            = mumps->MatAssemblyEnd;
170   B->ops->lufactorsymbolic       = mumps->MatLUFactorSymbolic;
171   B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic;
172   B->ops->destroy                = mumps->MatDestroy;
173 
174   /* put back original composed preallocation function */
175   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr);
176   if (f) {
177     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr);
178   }
179 
180   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
181   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
182   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
183   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
184   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
185   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
186   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
187   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
188 
189   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
190   *newmat = B;
191   PetscFunctionReturn(0);
192 }
193 EXTERN_C_END
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatDestroy_MUMPS"
197 PetscErrorCode MatDestroy_MUMPS(Mat A)
198 {
199   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
200   PetscErrorCode ierr;
201   PetscMPIInt    size=lu->size;
202   PetscErrorCode (*specialdestroy)(Mat);
203   PetscFunctionBegin;
204   if (lu->CleanUpMUMPS) {
205     /* Terminate instance, deallocate memories */
206     if (size > 1){
207       ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr);
208       ierr = VecScatterDestroy(lu->scat_rhs);CHKERRQ(ierr);
209       ierr = VecDestroy(lu->b_seq);CHKERRQ(ierr);
210       ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr);
211       ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr);
212       ierr = PetscFree(lu->val);CHKERRQ(ierr);
213     }
214     lu->id.job=JOB_END;
215 #if defined(PETSC_USE_COMPLEX)
216     zmumps_c(&lu->id);
217 #else
218     dmumps_c(&lu->id);
219 #endif
220     ierr = PetscFree(lu->irn);CHKERRQ(ierr);
221     ierr = PetscFree(lu->jcn);CHKERRQ(ierr);
222     ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr);
223   }
224   specialdestroy = lu->specialdestroy;
225   ierr = (*specialdestroy)(A);CHKERRQ(ierr);
226   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "MatDestroy_AIJMUMPS"
232 PetscErrorCode MatDestroy_AIJMUMPS(Mat A)
233 {
234   PetscErrorCode ierr;
235   PetscMPIInt    size;
236 
237   PetscFunctionBegin;
238   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
239   if (size==1) {
240     ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
241   } else {
242     ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
243   }
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatDestroy_SBAIJMUMPS"
249 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A)
250 {
251   PetscErrorCode ierr;
252   PetscMPIInt    size;
253 
254   PetscFunctionBegin;
255   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
256   if (size==1) {
257     ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
258   } else {
259     ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
260   }
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatSolve_MUMPS"
266 PetscErrorCode MatSolve_MUMPS(Mat A,Vec b,Vec x) {
267   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
268   PetscScalar    *array;
269   Vec            x_seq;
270   IS             is_iden,is_petsc;
271   VecScatter     scat_rhs=lu->scat_rhs,scat_sol=lu->scat_sol;
272   PetscErrorCode ierr;
273   PetscInt       i;
274 
275   PetscFunctionBegin;
276   lu->id.nrhs = 1;
277   x_seq = lu->b_seq;
278   if (lu->size > 1){
279     /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */
280     ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr);
281     ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr);
282     if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);}
283   } else {  /* size == 1 */
284     ierr = VecCopy(b,x);CHKERRQ(ierr);
285     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
286   }
287   if (!lu->myid) { /* define rhs on the host */
288 #if defined(PETSC_USE_COMPLEX)
289     lu->id.rhs = (mumps_double_complex*)array;
290 #else
291     lu->id.rhs = array;
292 #endif
293   }
294   if (lu->size == 1){
295     ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);
296   } else if (!lu->myid){
297     ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr);
298   }
299 
300   if (lu->size > 1){
301     /* distributed solution */
302     lu->id.ICNTL(21) = 1;
303     if (!lu->nSolve){
304       /* Create x_seq=sol_loc for repeated use */
305       PetscInt    lsol_loc;
306       PetscScalar *sol_loc;
307       lsol_loc = lu->id.INFO(23); /* length of sol_loc */
308       ierr = PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);CHKERRQ(ierr);
309       lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc);
310       lu->id.lsol_loc = lsol_loc;
311 #if defined(PETSC_USE_COMPLEX)
312       lu->id.sol_loc  = (ZMUMPS_DOUBLE *)sol_loc;
313 #else
314       lu->id.sol_loc  = (DMUMPS_DOUBLE *)sol_loc;
315 #endif
316       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);CHKERRQ(ierr);
317     }
318   }
319 
320   /* solve phase */
321   /*-------------*/
322   lu->id.job = 3;
323 #if defined(PETSC_USE_COMPLEX)
324   zmumps_c(&lu->id);
325 #else
326   dmumps_c(&lu->id);
327 #endif
328   if (lu->id.INFOG(1) < 0) {
329     SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
330   }
331 
332   if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */
333     if (!lu->nSolve){ /* create scatter scat_sol */
334       ierr = ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
335       for (i=0; i<lu->id.lsol_loc; i++){
336         lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */
337       }
338       ierr = ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);CHKERRQ(ierr);  /* to */
339       ierr = VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);CHKERRQ(ierr);
340       ierr = ISDestroy(is_iden);CHKERRQ(ierr);
341       ierr = ISDestroy(is_petsc);CHKERRQ(ierr);
342     }
343     ierr = VecScatterBegin(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr);
344     ierr = VecScatterEnd(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr);
345   }
346   lu->nSolve++;
347   PetscFunctionReturn(0);
348 }
349 
350 /*
351   input:
352    F:        numeric factor
353   output:
354    nneg:     total number of negative pivots
355    nzero:    0
356    npos:     (global dimension of F) - nneg
357 */
358 
359 #undef __FUNCT__
360 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS"
361 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
362 {
363   Mat_MUMPS      *lu =(Mat_MUMPS*)F->spptr;
364   PetscErrorCode ierr;
365   PetscMPIInt    size;
366 
367   PetscFunctionBegin;
368   ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr);
369   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
370   if (size > 1 && lu->id.ICNTL(13) != 1){
371     SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
372   }
373   if (nneg){
374     if (!lu->myid){
375       *nneg = lu->id.INFOG(12);
376     }
377     ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr);
378   }
379   if (nzero) *nzero = 0;
380   if (npos)  *npos  = F->rmap.N - (*nneg);
381   PetscFunctionReturn(0);
382 }
383 
384 #undef __FUNCT__
385 #define __FUNCT__ "MatFactorNumeric_MUMPS"
386 PetscErrorCode MatFactorNumeric_MUMPS(Mat A,MatFactorInfo *info,Mat *F)
387 {
388   Mat_MUMPS      *lu =(Mat_MUMPS*)(*F)->spptr;
389   Mat_MUMPS      *lua=(Mat_MUMPS*)(A)->spptr;
390   PetscErrorCode ierr;
391   PetscInt       rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl;
392   PetscTruth     valOnly,flg;
393   Mat            F_diag;
394 
395   PetscFunctionBegin;
396   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
397     (*F)->ops->solve    = MatSolve_MUMPS;
398 
399     /* Initialize a MUMPS instance */
400     ierr = MPI_Comm_rank(A->comm, &lu->myid);
401     ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr);
402     lua->myid = lu->myid; lua->size = lu->size;
403     lu->id.job = JOB_INIT;
404     ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr);
405     ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr);
406 
407     /* Set mumps options */
408     ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
409     lu->id.par=1;  /* host participates factorizaton and solve */
410     lu->id.sym=lu->sym;
411     if (lu->sym == 2){
412       ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr);
413       if (flg && icntl == 1) lu->id.sym=icntl;  /* matrix is spd */
414     }
415 #if defined(PETSC_USE_COMPLEX)
416     zmumps_c(&lu->id);
417 #else
418     dmumps_c(&lu->id);
419 #endif
420 
421     if (lu->size == 1){
422       lu->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
423     } else {
424       lu->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
425     }
426 
427     icntl=-1;
428     lu->id.ICNTL(4) = 0;  /* level of printing; overwrite mumps default ICNTL(4)=2 */
429     ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
430     if ((flg && icntl > 0) || PetscLogPrintInfo) {
431       lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
432     } else { /* no output */
433       lu->id.ICNTL(1) = 0;  /* error message, default= 6 */
434       lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */
435       lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */
436     }
437     ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr);
438     icntl=-1;
439     ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
440     if (flg) {
441       if (icntl== 1){
442         SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
443       } else {
444         lu->id.ICNTL(7) = icntl;
445       }
446     }
447     ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr);
448     ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr);
449     ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr);
450     ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr);
451     ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr);
452     ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr);
453     ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr);
454 
455     ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr);
456     ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr);
457     ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr);
458     ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",lu->id.CNTL(4),&lu->id.CNTL(4),PETSC_NULL);CHKERRQ(ierr);
459     PetscOptionsEnd();
460   }
461 
462   /* define matrix A */
463   switch (lu->id.ICNTL(18)){
464   case 0:  /* centralized assembled matrix input (size=1) */
465     if (!lu->myid) {
466       if (lua->isAIJ){
467         Mat_SeqAIJ   *aa = (Mat_SeqAIJ*)A->data;
468         nz               = aa->nz;
469         ai = aa->i; aj = aa->j; lu->val = aa->a;
470       } else {
471         Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
472         nz                  =  aa->nz;
473         ai = aa->i; aj = aa->j; lu->val = aa->a;
474       }
475       if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */
476         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr);
477         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr);
478         nz = 0;
479         for (i=0; i<M; i++){
480           rnz = ai[i+1] - ai[i];
481           while (rnz--) {  /* Fortran row/col index! */
482             lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++;
483           }
484         }
485       }
486     }
487     break;
488   case 3:  /* distributed assembled matrix input (size>1) */
489     if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
490       valOnly = PETSC_FALSE;
491     } else {
492       valOnly = PETSC_TRUE; /* only update mat values, not row and col index */
493     }
494     ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr);
495     break;
496   default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS.");
497   }
498 
499   /* analysis phase */
500   /*----------------*/
501   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
502     lu->id.job = 1;
503 
504     lu->id.n = M;
505     switch (lu->id.ICNTL(18)){
506     case 0:  /* centralized assembled matrix input */
507       if (!lu->myid) {
508         lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
509         if (lu->id.ICNTL(6)>1){
510 #if defined(PETSC_USE_COMPLEX)
511           lu->id.a = (mumps_double_complex*)lu->val;
512 #else
513           lu->id.a = lu->val;
514 #endif
515         }
516       }
517       break;
518     case 3:  /* distributed assembled matrix input (size>1) */
519       lu->id.nz_loc = nnz;
520       lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
521       if (lu->id.ICNTL(6)>1) {
522 #if defined(PETSC_USE_COMPLEX)
523         lu->id.a_loc = (mumps_double_complex*)lu->val;
524 #else
525         lu->id.a_loc = lu->val;
526 #endif
527       }
528       /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
529       IS  is_iden;
530       Vec b;
531       if (!lu->myid){
532         ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);CHKERRQ(ierr);
533         ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);CHKERRQ(ierr);
534       } else {
535         ierr = VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);CHKERRQ(ierr);
536         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr);
537       }
538       ierr = VecCreate(A->comm,&b);CHKERRQ(ierr);
539       ierr = VecSetSizes(b,A->rmap.n,PETSC_DECIDE);CHKERRQ(ierr);
540       ierr = VecSetFromOptions(b);CHKERRQ(ierr);
541 
542       ierr = VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);CHKERRQ(ierr);
543       ierr = ISDestroy(is_iden);CHKERRQ(ierr);
544       ierr = VecDestroy(b);CHKERRQ(ierr);
545       break;
546     }
547 #if defined(PETSC_USE_COMPLEX)
548     zmumps_c(&lu->id);
549 #else
550     dmumps_c(&lu->id);
551 #endif
552     if (lu->id.INFOG(1) < 0) {
553       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
554     }
555   }
556 
557   /* numerical factorization phase */
558   /*-------------------------------*/
559   lu->id.job = 2;
560   if(!lu->id.ICNTL(18)) {
561     if (!lu->myid) {
562 #if defined(PETSC_USE_COMPLEX)
563       lu->id.a = (mumps_double_complex*)lu->val;
564 #else
565       lu->id.a = lu->val;
566 #endif
567     }
568   } else {
569 #if defined(PETSC_USE_COMPLEX)
570     lu->id.a_loc = (mumps_double_complex*)lu->val;
571 #else
572     lu->id.a_loc = lu->val;
573 #endif
574   }
575 #if defined(PETSC_USE_COMPLEX)
576   zmumps_c(&lu->id);
577 #else
578   dmumps_c(&lu->id);
579 #endif
580   if (lu->id.INFOG(1) < 0) {
581     if (lu->id.INFO(1) == -13) {
582       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2));
583     } else {
584       SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
585     }
586   }
587 
588   if (!lu->myid && lu->id.ICNTL(16) > 0){
589     SETERRQ1(PETSC_ERR_LIB,"  lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
590   }
591 
592   if (lu->size > 1){
593     if ((*F)->factor == FACTOR_LU){
594       F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
595     } else {
596       F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A;
597     }
598     F_diag->assembled = PETSC_TRUE;
599     if (lu->nSolve){
600       ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr);
601       ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr);
602       ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr);
603     }
604   }
605   (*F)->assembled   = PETSC_TRUE;
606   lu->matstruc      = SAME_NONZERO_PATTERN;
607   lu->CleanUpMUMPS  = PETSC_TRUE;
608   lu->nSolve        = 0;
609   PetscFunctionReturn(0);
610 }
611 
612 /* Note the Petsc r and c permutations are ignored */
613 #undef __FUNCT__
614 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS"
615 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) {
616   Mat            B;
617   Mat_MUMPS      *lu;
618   PetscErrorCode ierr;
619 
620   PetscFunctionBegin;
621   /* Create the factorization matrix */
622   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
623   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
624   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
625   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
626   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
627 
628   B->ops->lufactornumeric = MatFactorNumeric_MUMPS;
629   B->factor               = FACTOR_LU;
630   lu                      = (Mat_MUMPS*)B->spptr;
631   lu->sym                 = 0;
632   lu->matstruc            = DIFFERENT_NONZERO_PATTERN;
633 
634   *F = B;
635   PetscFunctionReturn(0);
636 }
637 
638 /* Note the Petsc r permutation is ignored */
639 #undef __FUNCT__
640 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS"
641 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) {
642   Mat            B;
643   Mat_MUMPS      *lu;
644   PetscErrorCode ierr;
645 
646   PetscFunctionBegin;
647   /* Create the factorization matrix */
648   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
649   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
650   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
651   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
652   ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
653 
654   B->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS;
655   B->ops->getinertia            = MatGetInertia_SBAIJMUMPS;
656   B->factor                     = FACTOR_CHOLESKY;
657   lu                            = (Mat_MUMPS*)B->spptr;
658   lu->sym                       = 2;
659   lu->matstruc                  = DIFFERENT_NONZERO_PATTERN;
660 
661   *F = B;
662   PetscFunctionReturn(0);
663 }
664 
665 #undef __FUNCT__
666 #define __FUNCT__ "MatFactorInfo_MUMPS"
667 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) {
668   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
669   PetscErrorCode ierr;
670 
671   PetscFunctionBegin;
672   /* check if matrix is mumps type */
673   if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(0);
674 
675   ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
676   ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                  %d \n",lu->id.sym);CHKERRQ(ierr);
677   ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):           %d \n",lu->id.par);CHKERRQ(ierr);
678   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):        %d \n",lu->id.ICNTL(1));CHKERRQ(ierr);
679   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg):%d \n",lu->id.ICNTL(2));CHKERRQ(ierr);
680   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):  %d \n",lu->id.ICNTL(3));CHKERRQ(ierr);
681   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):       %d \n",lu->id.ICNTL(4));CHKERRQ(ierr);
682   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):        %d \n",lu->id.ICNTL(5));CHKERRQ(ierr);
683   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):       %d \n",lu->id.ICNTL(6));CHKERRQ(ierr);
684   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (matrix ordering):         %d \n",lu->id.ICNTL(7));CHKERRQ(ierr);
685   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scalling strategy):       %d \n",lu->id.ICNTL(8));CHKERRQ(ierr);
686   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(9) (A/A^T x=b is solved):     %d \n",lu->id.ICNTL(9));CHKERRQ(ierr);
687   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr);
688   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):         %d \n",lu->id.ICNTL(11));CHKERRQ(ierr);
689   if (!lu->myid && lu->id.ICNTL(11)>0) {
690     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(4) (inf norm of input mat):        %g\n",lu->id.RINFOG(4));CHKERRQ(ierr);
691     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(5) (inf norm of solution):         %g\n",lu->id.RINFOG(5));CHKERRQ(ierr);
692     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(6) (inf norm of residual):         %g\n",lu->id.RINFOG(6));CHKERRQ(ierr);
693     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr);
694     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(9) (error estimate):               %g \n",lu->id.RINFOG(9));CHKERRQ(ierr);
695     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr);
696 
697   }
698   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",lu->id.ICNTL(12));CHKERRQ(ierr);
699   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",lu->id.ICNTL(13));CHKERRQ(ierr);
700   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr);
701   /* ICNTL(15-17) not used */
702   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",lu->id.ICNTL(18));CHKERRQ(ierr);
703   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Shur complement info):                       %d \n",lu->id.ICNTL(19));CHKERRQ(ierr);
704   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (rhs sparse pattern):                         %d \n",lu->id.ICNTL(20));CHKERRQ(ierr);
705   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d \n",lu->id.ICNTL(21));CHKERRQ(ierr);
706 
707   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",lu->id.CNTL(1));CHKERRQ(ierr);
708   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr);
709   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",lu->id.CNTL(3));CHKERRQ(ierr);
710   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",lu->id.CNTL(4));CHKERRQ(ierr);
711 
712   /* infomation local to each processor */
713   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);}
714   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr);
715   ierr = PetscSynchronizedFlush(A->comm);
716   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);}
717   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr);
718   ierr = PetscSynchronizedFlush(A->comm);
719   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);}
720   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr);
721   ierr = PetscSynchronizedFlush(A->comm);
722   /*
723   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(2) (info about error or warning ): \n");CHKERRQ(ierr);}
724   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(2));CHKERRQ(ierr);
725   ierr = PetscSynchronizedFlush(A->comm);
726   */
727 
728   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);}
729   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(15));CHKERRQ(ierr);
730   ierr = PetscSynchronizedFlush(A->comm);
731 
732   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);}
733   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(16));CHKERRQ(ierr);
734   ierr = PetscSynchronizedFlush(A->comm);
735 
736   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);}
737   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(23));CHKERRQ(ierr);
738   ierr = PetscSynchronizedFlush(A->comm);
739 
740   if (!lu->myid){ /* information from the host */
741     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr);
742     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr);
743     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr);
744 
745     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr);
746     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr);
747     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr);
748     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr);
749     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr);
750     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr);
751     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));CHKERRQ(ierr);
752     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));CHKERRQ(ierr);
753     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));CHKERRQ(ierr);
754     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr);
755     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr);
756     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr);
757     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr);
758     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr);
759     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr);
760     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr);
761     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr);
762      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr);
763      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));CHKERRQ(ierr);
764      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));CHKERRQ(ierr);
765      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));CHKERRQ(ierr);
766      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));CHKERRQ(ierr);
767      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));CHKERRQ(ierr);
768   }
769 
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNCT__
774 #define __FUNCT__ "MatView_MUMPS"
775 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) {
776   PetscErrorCode    ierr;
777   PetscTruth        iascii;
778   PetscViewerFormat format;
779   Mat_MUMPS         *mumps=(Mat_MUMPS*)(A->spptr);
780 
781   PetscFunctionBegin;
782   ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr);
783 
784   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
785   if (iascii) {
786     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
787     if (format == PETSC_VIEWER_ASCII_INFO){
788       ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr);
789     }
790   }
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNCT__
795 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS"
796 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) {
797   PetscErrorCode ierr;
798   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
799 
800   PetscFunctionBegin;
801   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
802 
803   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
804   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
805   A->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
806   PetscFunctionReturn(0);
807 }
808 
809 EXTERN_C_BEGIN
810 #undef __FUNCT__
811 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS"
812 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
813 {
814   PetscErrorCode ierr;
815   PetscMPIInt    size;
816   MPI_Comm       comm;
817   Mat            B=*newmat;
818   Mat_MUMPS      *mumps;
819 
820   PetscFunctionBegin;
821   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
822   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
823 
824   if (reuse == MAT_INITIAL_MATRIX) {
825     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
826     /* A may have special container that is not duplicated,
827        e.g., A is obtainted from MatMatMult(,&A). Save B->ops instead */
828     mumps->MatDuplicate              = B->ops->duplicate;
829     mumps->MatView                   = B->ops->view;
830     mumps->MatAssemblyEnd            = B->ops->assemblyend;
831     mumps->MatLUFactorSymbolic       = B->ops->lufactorsymbolic;
832     mumps->MatCholeskyFactorSymbolic = B->ops->choleskyfactorsymbolic;
833     mumps->MatDestroy                = B->ops->destroy;
834   } else {
835     mumps->MatDuplicate              = A->ops->duplicate;
836     mumps->MatView                   = A->ops->view;
837     mumps->MatAssemblyEnd            = A->ops->assemblyend;
838     mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
839     mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
840     mumps->MatDestroy                = A->ops->destroy;
841   }
842   mumps->specialdestroy            = MatDestroy_AIJMUMPS;
843   mumps->CleanUpMUMPS              = PETSC_FALSE;
844   mumps->isAIJ                     = PETSC_TRUE;
845 
846   B->spptr                         = (void*)mumps;
847   B->ops->duplicate                = MatDuplicate_MUMPS;
848   B->ops->view                     = MatView_MUMPS;
849   B->ops->assemblyend              = MatAssemblyEnd_AIJMUMPS;
850   B->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
851   B->ops->destroy                  = MatDestroy_MUMPS;
852 
853   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
854   if (size == 1) {
855     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C",
856                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
857     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C",
858                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
859   } else {
860     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C",
861                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
862     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C",
863                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
864   }
865 
866   ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr);
867   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
868   *newmat = B;
869   PetscFunctionReturn(0);
870 }
871 EXTERN_C_END
872 
873 /*MC
874   MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed
875   and sequential matrices via the external package MUMPS.
876 
877   If MUMPS is installed (see the manual for instructions
878   on how to declare the existence of external packages),
879   a matrix type can be constructed which invokes MUMPS solvers.
880   After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS).
881 
882   If created with a single process communicator, this matrix type inherits from MATSEQAIJ.
883   Otherwise, this matrix type inherits from MATMPIAIJ.  Hence for single process communicators,
884   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
885   for communicators controlling multiple processes.  It is recommended that you call both of
886   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
887   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
888   without data copy.
889 
890   Options Database Keys:
891 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions()
892 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
893 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level
894 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
895 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
896 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
897 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
898 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
899 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
900 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
901 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
902 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
903 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
904 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
905 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
906 
907   Level: beginner
908 
909 .seealso: MATSBAIJMUMPS
910 M*/
911 
912 EXTERN_C_BEGIN
913 #undef __FUNCT__
914 #define __FUNCT__ "MatCreate_AIJMUMPS"
915 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A)
916 {
917   PetscErrorCode ierr;
918   PetscMPIInt    size;
919 
920   PetscFunctionBegin;
921   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
922   if (size == 1) {
923     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
924   } else {
925     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
926     /*
927     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
928     ierr   = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
929     */
930   }
931   ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
932   PetscFunctionReturn(0);
933 }
934 EXTERN_C_END
935 
936 #undef __FUNCT__
937 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS"
938 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode)
939 {
940   PetscErrorCode ierr;
941   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
942 
943   PetscFunctionBegin;
944   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
945   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
946   A->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
947   PetscFunctionReturn(0);
948 }
949 
950 EXTERN_C_BEGIN
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS"
953 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
954 {
955   Mat       A;
956   Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr;
957   PetscErrorCode ierr;
958 
959   PetscFunctionBegin;
960   /*
961     After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix
962     into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS.  I would
963     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
964     block size info so that PETSc can determine the local size properly.  The block size info is set
965     in the preallocation routine.
966   */
967   ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
968   A    = ((Mat_MPISBAIJ *)B->data)->A;
969   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 EXTERN_C_END
973 
974 EXTERN_C_BEGIN
975 #undef __FUNCT__
976 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS"
977 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
978 {
979   PetscErrorCode ierr;
980   PetscMPIInt    size;
981   MPI_Comm       comm;
982   Mat            B=*newmat;
983   Mat_MUMPS      *mumps;
984   void           (*f)(void);
985 
986   PetscFunctionBegin;
987   if (reuse == MAT_INITIAL_MATRIX) {
988     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
989   }
990 
991   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
992   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
993 
994   mumps->MatDuplicate              = A->ops->duplicate;
995   mumps->MatView                   = A->ops->view;
996   mumps->MatAssemblyEnd            = A->ops->assemblyend;
997   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
998   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
999   mumps->MatDestroy                = A->ops->destroy;
1000   mumps->specialdestroy            = MatDestroy_SBAIJMUMPS;
1001   mumps->CleanUpMUMPS              = PETSC_FALSE;
1002   mumps->isAIJ                     = PETSC_FALSE;
1003 
1004   B->spptr                         = (void*)mumps;
1005   B->ops->duplicate                = MatDuplicate_MUMPS;
1006   B->ops->view                     = MatView_MUMPS;
1007   B->ops->assemblyend              = MatAssemblyEnd_SBAIJMUMPS;
1008   B->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
1009   B->ops->destroy                  = MatDestroy_MUMPS;
1010 
1011   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
1012   if (size == 1) {
1013     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C",
1014                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
1015     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C",
1016                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
1017   } else {
1018   /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */
1019     ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr);
1020     if (f) { /* This case should always be true when this routine is called */
1021       mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f;
1022       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1023                                                "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS",
1024                                                MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr);
1025     }
1026     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C",
1027                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
1028     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C",
1029                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
1030   }
1031 
1032   ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr);
1033   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
1034   *newmat = B;
1035   PetscFunctionReturn(0);
1036 }
1037 EXTERN_C_END
1038 
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatDuplicate_MUMPS"
1041 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) {
1042   PetscErrorCode ierr;
1043   Mat_MUMPS   *lu=(Mat_MUMPS *)A->spptr;
1044 
1045   PetscFunctionBegin;
1046   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
1047   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 /*MC
1052   MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for
1053   distributed and sequential matrices via the external package MUMPS.
1054 
1055   If MUMPS is installed (see the manual for instructions
1056   on how to declare the existence of external packages),
1057   a matrix type can be constructed which invokes MUMPS solvers.
1058   After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS).
1059 
1060   If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ.
1061   Otherwise, this matrix type inherits from MATMPISBAIJ.  Hence for single process communicators,
1062   MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
1063   for communicators controlling multiple processes.  It is recommended that you call both of
1064   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
1065   conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size)
1066   without data copy.
1067 
1068   Options Database Keys:
1069 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions()
1070 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
1071 . -mat_mumps_icntl_4 <0,...,4> - print level
1072 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
1073 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
1074 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
1075 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
1076 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
1077 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
1078 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
1079 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
1080 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
1081 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
1082 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
1083 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
1084 
1085   Level: beginner
1086 
1087 .seealso: MATAIJMUMPS
1088 M*/
1089 
1090 EXTERN_C_BEGIN
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatCreate_SBAIJMUMPS"
1093 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A)
1094 {
1095   PetscErrorCode ierr;
1096   PetscMPIInt    size;
1097 
1098   PetscFunctionBegin;
1099   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
1100   if (size == 1) {
1101     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1102   } else {
1103     ierr   = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1104   }
1105   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 EXTERN_C_END
1109