xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 593c1905dad567ae273cd337623ecaef31b3e817)
1 #define PETSCMAT_DLL
2 
3 /*
4     Provides an interface to the MUMPS sparse solver
5 */
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13 #include "zmumps_c.h"
14 #else
15 #include "dmumps_c.h"
16 #endif
17 EXTERN_C_END
18 #define JOB_INIT -1
19 #define JOB_END -2
20 /* macros s.t. indices match MUMPS documentation */
21 #define ICNTL(I) icntl[(I)-1]
22 #define CNTL(I) cntl[(I)-1]
23 #define INFOG(I) infog[(I)-1]
24 #define INFO(I) info[(I)-1]
25 #define RINFOG(I) rinfog[(I)-1]
26 #define RINFO(I) rinfo[(I)-1]
27 
28 typedef struct {
29 #if defined(PETSC_USE_COMPLEX)
30   ZMUMPS_STRUC_C id;
31 #else
32   DMUMPS_STRUC_C id;
33 #endif
34   MatStructure   matstruc;
35   PetscMPIInt    myid,size;
36   PetscInt       *irn,*jcn,sym,nSolve;
37   PetscScalar    *val;
38   MPI_Comm       comm_mumps;
39   VecScatter     scat_rhs, scat_sol;
40   PetscTruth     isAIJ,CleanUpMUMPS;
41   Vec            b_seq,x_seq;
42   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   PetscErrorCode (*MatView)(Mat,PetscViewer);
44   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48   PetscErrorCode (*specialdestroy)(Mat);
49   PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*);
50 } Mat_MUMPS;
51 
52 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*);
53 EXTERN_C_BEGIN
54 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*);
55 EXTERN_C_END
56 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */
57 /*
58   input:
59     A       - matrix in mpiaij or mpisbaij (bs=1) format
60     shift   - 0: C style output triple; 1: Fortran style output triple.
61     valOnly - FALSE: spaces are allocated and values are set for the triple
62               TRUE:  only the values in v array are updated
63   output:
64     nnz     - dim of r, c, and v (number of local nonzero entries of A)
65     r, c, v - row and col index, matrix values (matrix triples)
66  */
67 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) {
68   PetscInt       *ai, *aj, *bi, *bj, rstart,nz, *garray;
69   PetscErrorCode ierr;
70   PetscInt       i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol;
71   PetscInt       *row,*col;
72   PetscScalar    *av, *bv,*val;
73   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
74 
75   PetscFunctionBegin;
76   if (mumps->isAIJ){
77     Mat_MPIAIJ    *mat =  (Mat_MPIAIJ*)A->data;
78     Mat_SeqAIJ    *aa=(Mat_SeqAIJ*)(mat->A)->data;
79     Mat_SeqAIJ    *bb=(Mat_SeqAIJ*)(mat->B)->data;
80     nz = aa->nz + bb->nz;
81     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
82     garray = mat->garray;
83     av=aa->a; bv=bb->a;
84 
85   } else {
86     Mat_MPISBAIJ  *mat =  (Mat_MPISBAIJ*)A->data;
87     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
88     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
89     if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs);
90     nz = aa->nz + bb->nz;
91     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
92     garray = mat->garray;
93     av=aa->a; bv=bb->a;
94   }
95 
96   if (!valOnly){
97     ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr);
98     ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr);
99     ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr);
100     *r = row; *c = col; *v = val;
101   } else {
102     row = *r; col = *c; val = *v;
103   }
104   *nnz = nz;
105 
106   jj = 0; irow = rstart;
107   for ( i=0; i<m; i++ ) {
108     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
109     countA = ai[i+1] - ai[i];
110     countB = bi[i+1] - bi[i];
111     bjj = bj + bi[i];
112 
113     /* get jB, the starting local col index for the 2nd B-part */
114     colA_start = rstart + ajj[0]; /* the smallest col index for A */
115     j=-1;
116     do {
117       j++;
118       if (j == countB) break;
119       jcol = garray[bjj[j]];
120     } while (jcol < colA_start);
121     jB = j;
122 
123     /* B-part, smaller col index */
124     colA_start = rstart + ajj[0]; /* the smallest col index for A */
125     for (j=0; j<jB; j++){
126       jcol = garray[bjj[j]];
127       if (!valOnly){
128         row[jj] = irow + shift; col[jj] = jcol + shift;
129 
130       }
131       val[jj++] = *bv++;
132     }
133     /* A-part */
134     for (j=0; j<countA; j++){
135       if (!valOnly){
136         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
137       }
138       val[jj++] = *av++;
139     }
140     /* B-part, larger col index */
141     for (j=jB; j<countB; j++){
142       if (!valOnly){
143         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
144       }
145       val[jj++] = *bv++;
146     }
147     irow++;
148   }
149 
150   PetscFunctionReturn(0);
151 }
152 
153 EXTERN_C_BEGIN
154 #undef __FUNCT__
155 #define __FUNCT__ "MatConvert_MUMPS_Base"
156 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
157 {
158   PetscErrorCode ierr;
159   Mat            B=*newmat;
160   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
161   void           (*f)(void);
162 
163   PetscFunctionBegin;
164   if (reuse == MAT_INITIAL_MATRIX) {
165     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
166   }
167   B->ops->duplicate              = mumps->MatDuplicate;
168   B->ops->view                   = mumps->MatView;
169   B->ops->assemblyend            = mumps->MatAssemblyEnd;
170   B->ops->lufactorsymbolic       = mumps->MatLUFactorSymbolic;
171   B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic;
172   B->ops->destroy                = mumps->MatDestroy;
173 
174   /* put back original composed preallocation function */
175   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr);
176   if (f) {
177     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr);
178   }
179 
180   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
181   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
182   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
183   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
184   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
185   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
186   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
187   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
188 
189   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
190   *newmat = B;
191   PetscFunctionReturn(0);
192 }
193 EXTERN_C_END
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatDestroy_MUMPS"
197 PetscErrorCode MatDestroy_MUMPS(Mat A)
198 {
199   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
200   PetscErrorCode ierr;
201   PetscMPIInt    size=lu->size;
202   PetscErrorCode (*specialdestroy)(Mat);
203   PetscFunctionBegin;
204   if (lu->CleanUpMUMPS) {
205     /* Terminate instance, deallocate memories */
206     if (size > 1){
207       ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr);
208       ierr = VecScatterDestroy(lu->scat_rhs);CHKERRQ(ierr);
209       ierr = VecDestroy(lu->b_seq);CHKERRQ(ierr);
210       ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr);
211       ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr);
212       ierr = PetscFree(lu->val);CHKERRQ(ierr);
213     }
214     lu->id.job=JOB_END;
215 #if defined(PETSC_USE_COMPLEX)
216     zmumps_c(&lu->id);
217 #else
218     dmumps_c(&lu->id);
219 #endif
220     ierr = PetscFree(lu->irn);CHKERRQ(ierr);
221     ierr = PetscFree(lu->jcn);CHKERRQ(ierr);
222     ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr);
223   }
224   specialdestroy = lu->specialdestroy;
225   ierr = (*specialdestroy)(A);CHKERRQ(ierr);
226   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "MatDestroy_AIJMUMPS"
232 PetscErrorCode MatDestroy_AIJMUMPS(Mat A)
233 {
234   PetscErrorCode ierr;
235   PetscMPIInt    size;
236 
237   PetscFunctionBegin;
238   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
239   if (size==1) {
240     ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
241   } else {
242     ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
243   }
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatDestroy_SBAIJMUMPS"
249 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A)
250 {
251   PetscErrorCode ierr;
252   PetscMPIInt    size;
253 
254   PetscFunctionBegin;
255   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
256   if (size==1) {
257     ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
258   } else {
259     ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
260   }
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatFactorInfo_MUMPS"
266 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) {
267   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
268   PetscErrorCode ierr;
269 
270   PetscFunctionBegin;
271   ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
272   ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                  %d \n",lu->id.sym);CHKERRQ(ierr);
273   ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):           %d \n",lu->id.par);CHKERRQ(ierr);
274   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):        %d \n",lu->id.ICNTL(1));CHKERRQ(ierr);
275   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg):%d \n",lu->id.ICNTL(2));CHKERRQ(ierr);
276   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):  %d \n",lu->id.ICNTL(3));CHKERRQ(ierr);
277   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):       %d \n",lu->id.ICNTL(4));CHKERRQ(ierr);
278   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):        %d \n",lu->id.ICNTL(5));CHKERRQ(ierr);
279   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):       %d \n",lu->id.ICNTL(6));CHKERRQ(ierr);
280   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (matrix ordering):         %d \n",lu->id.ICNTL(7));CHKERRQ(ierr);
281   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scalling strategy):       %d \n",lu->id.ICNTL(8));CHKERRQ(ierr);
282   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(9) (A/A^T x=b is solved):     %d \n",lu->id.ICNTL(9));CHKERRQ(ierr);
283   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr);
284   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):         %d \n",lu->id.ICNTL(11));CHKERRQ(ierr);
285   if (!lu->myid && lu->id.ICNTL(11)>0) {
286     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(4) (inf norm of input mat):        %g\n",lu->id.RINFOG(4));CHKERRQ(ierr);
287     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(5) (inf norm of solution):         %g\n",lu->id.RINFOG(5));CHKERRQ(ierr);
288     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(6) (inf norm of residual):         %g\n",lu->id.RINFOG(6));CHKERRQ(ierr);
289     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr);
290     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(9) (error estimate):               %g \n",lu->id.RINFOG(9));CHKERRQ(ierr);
291     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr);
292 
293   }
294   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",lu->id.ICNTL(12));CHKERRQ(ierr);
295   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",lu->id.ICNTL(13));CHKERRQ(ierr);
296   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr);
297   /* ICNTL(15-17) not used */
298   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",lu->id.ICNTL(18));CHKERRQ(ierr);
299   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Shur complement info):                       %d \n",lu->id.ICNTL(19));CHKERRQ(ierr);
300   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (rhs sparse pattern):                         %d \n",lu->id.ICNTL(20));CHKERRQ(ierr);
301   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d \n",lu->id.ICNTL(21));CHKERRQ(ierr);
302 
303   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",lu->id.CNTL(1));CHKERRQ(ierr);
304   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr);
305   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",lu->id.CNTL(3));CHKERRQ(ierr);
306   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",lu->id.CNTL(4));CHKERRQ(ierr);
307 
308   /* infomation local to each processor */
309   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);}
310   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr);
311   ierr = PetscSynchronizedFlush(A->comm);
312   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);}
313   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr);
314   ierr = PetscSynchronizedFlush(A->comm);
315   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);}
316   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr);
317   ierr = PetscSynchronizedFlush(A->comm);
318   /*
319   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(2) (info about error or warning ): \n");CHKERRQ(ierr);}
320   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(2));CHKERRQ(ierr);
321   ierr = PetscSynchronizedFlush(A->comm);
322   */
323 
324   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);}
325   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(15));CHKERRQ(ierr);
326   ierr = PetscSynchronizedFlush(A->comm);
327 
328   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);}
329   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(16));CHKERRQ(ierr);
330   ierr = PetscSynchronizedFlush(A->comm);
331 
332   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);}
333   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(23));CHKERRQ(ierr);
334   ierr = PetscSynchronizedFlush(A->comm);
335 
336   if (!lu->myid){ /* information from the host */
337     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr);
338     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr);
339     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr);
340 
341     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr);
342     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr);
343     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr);
344     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr);
345     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr);
346     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr);
347     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));CHKERRQ(ierr);
348     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));CHKERRQ(ierr);
349     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));CHKERRQ(ierr);
350     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr);
351     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr);
352     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr);
353     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr);
354     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr);
355     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr);
356     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr);
357     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr);
358      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr);
359      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));CHKERRQ(ierr);
360      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));CHKERRQ(ierr);
361      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));CHKERRQ(ierr);
362      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));CHKERRQ(ierr);
363      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));CHKERRQ(ierr);
364   }
365 
366   PetscFunctionReturn(0);
367 }
368 
369 #undef __FUNCT__
370 #define __FUNCT__ "MatView_MUMPS"
371 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) {
372   PetscErrorCode    ierr;
373   PetscTruth        iascii;
374   PetscViewerFormat format;
375   Mat_MUMPS         *mumps=(Mat_MUMPS*)(A->spptr);
376 
377   PetscFunctionBegin;
378   ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr);
379 
380   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
381   if (iascii) {
382     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
383     if (format == PETSC_VIEWER_ASCII_INFO){
384       ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr);
385     }
386   }
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNCT__
391 #define __FUNCT__ "MatSolve_AIJMUMPS"
392 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) {
393   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
394   PetscScalar    *array;
395   Vec            x_seq;
396   IS             is_iden,is_petsc;
397   VecScatter     scat_rhs=lu->scat_rhs,scat_sol=lu->scat_sol;
398   PetscErrorCode ierr;
399   PetscInt       i;
400 
401   PetscFunctionBegin;
402   lu->id.nrhs = 1;
403   x_seq = lu->b_seq;
404   if (lu->size > 1){
405     /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */
406     ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr);
407     ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr);
408     if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);}
409   } else {  /* size == 1 */
410     ierr = VecCopy(b,x);CHKERRQ(ierr);
411     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
412   }
413   if (!lu->myid) { /* define rhs on the host */
414 #if defined(PETSC_USE_COMPLEX)
415     lu->id.rhs = (mumps_double_complex*)array;
416 #else
417     lu->id.rhs = array;
418 #endif
419   }
420   if (lu->size == 1){
421     ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);
422   } else if (!lu->myid){
423     ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr);
424   }
425 
426   if (lu->size > 1){
427     /* distributed solution */
428     lu->id.ICNTL(21) = 1;
429     if (!lu->nSolve){
430       /* Create x_seq=sol_loc for repeated use */
431       PetscInt    lsol_loc;
432       PetscScalar *sol_loc;
433       lsol_loc = lu->id.INFO(23); /* length of sol_loc */
434       ierr = PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);CHKERRQ(ierr);
435       lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc);
436       lu->id.lsol_loc = lsol_loc;
437       lu->id.sol_loc  = (F_DOUBLE *)sol_loc;
438       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);CHKERRQ(ierr);
439     }
440   }
441 
442   /* solve phase */
443   /*-------------*/
444   lu->id.job = 3;
445 #if defined(PETSC_USE_COMPLEX)
446   zmumps_c(&lu->id);
447 #else
448   dmumps_c(&lu->id);
449 #endif
450   if (lu->id.INFOG(1) < 0) {
451     SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
452   }
453 
454   if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */
455     if (!lu->nSolve){ /* create scatter scat_sol */
456       ierr = ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
457       for (i=0; i<lu->id.lsol_loc; i++){
458         lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */
459       }
460       ierr = ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);CHKERRQ(ierr);  /* to */
461       ierr = VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);CHKERRQ(ierr);
462       ierr = ISDestroy(is_iden);CHKERRQ(ierr);
463       ierr = ISDestroy(is_petsc);CHKERRQ(ierr);
464     }
465     ierr = VecScatterBegin(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr);
466     ierr = VecScatterEnd(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr);
467   }
468   lu->nSolve++;
469   PetscFunctionReturn(0);
470 }
471 
472 /*
473   input:
474    F:        numeric factor
475   output:
476    nneg:     total number of negative pivots
477    nzero:    0
478    npos:     (global dimension of F) - nneg
479 */
480 
481 #undef __FUNCT__
482 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS"
483 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
484 {
485   Mat_MUMPS      *lu =(Mat_MUMPS*)F->spptr;
486   PetscErrorCode ierr;
487   PetscMPIInt    size;
488 
489   PetscFunctionBegin;
490   ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr);
491   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
492   if (size > 1 && lu->id.ICNTL(13) != 1){
493     SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
494   }
495   if (nneg){
496     if (!lu->myid){
497       *nneg = lu->id.INFOG(12);
498     }
499     ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr);
500   }
501   if (nzero) *nzero = 0;
502   if (npos)  *npos  = F->rmap.N - (*nneg);
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS"
508 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F)
509 {
510   Mat_MUMPS      *lu =(Mat_MUMPS*)(*F)->spptr;
511   Mat_MUMPS      *lua=(Mat_MUMPS*)(A)->spptr;
512   PetscErrorCode ierr;
513   PetscInt       rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl;
514   PetscTruth     valOnly,flg;
515   Mat            F_diag;
516 
517   PetscFunctionBegin;
518   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
519     (*F)->ops->solve    = MatSolve_AIJMUMPS;
520 
521     /* Initialize a MUMPS instance */
522     ierr = MPI_Comm_rank(A->comm, &lu->myid);
523     ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr);
524     lua->myid = lu->myid; lua->size = lu->size;
525     lu->id.job = JOB_INIT;
526     ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr);
527     ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr);
528 
529     /* Set mumps options */
530     ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
531     lu->id.par=1;  /* host participates factorizaton and solve */
532     lu->id.sym=lu->sym;
533     if (lu->sym == 2){
534       ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr);
535       if (flg && icntl == 1) lu->id.sym=icntl;  /* matrix is spd */
536     }
537 #if defined(PETSC_USE_COMPLEX)
538     zmumps_c(&lu->id);
539 #else
540     dmumps_c(&lu->id);
541 #endif
542 
543     if (lu->size == 1){
544       lu->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
545     } else {
546       lu->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
547     }
548 
549     icntl=-1;
550     ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
551     if ((flg && icntl > 0) || PetscLogPrintInfo) {
552       lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
553     } else { /* no output */
554       lu->id.ICNTL(1) = 0;  /* error message, default= 6 */
555       lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */
556       lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */
557       lu->id.ICNTL(4) = 0;  /* level of printing, 0,1,2,3,4, default=2 */
558     }
559     ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr);
560     icntl=-1;
561     ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
562     if (flg) {
563       if (icntl== 1){
564         SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
565       } else {
566         lu->id.ICNTL(7) = icntl;
567       }
568     }
569     ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr);
570     ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr);
571     ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr);
572     ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr);
573     ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr);
574     ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr);
575     ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr);
576 
577     ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr);
578     ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr);
579     ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr);
580     PetscOptionsEnd();
581   }
582 
583   /* define matrix A */
584   switch (lu->id.ICNTL(18)){
585   case 0:  /* centralized assembled matrix input (size=1) */
586     if (!lu->myid) {
587       if (lua->isAIJ){
588         Mat_SeqAIJ   *aa = (Mat_SeqAIJ*)A->data;
589         nz               = aa->nz;
590         ai = aa->i; aj = aa->j; lu->val = aa->a;
591       } else {
592         Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
593         nz                  =  aa->nz;
594         ai = aa->i; aj = aa->j; lu->val = aa->a;
595       }
596       if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */
597         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr);
598         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr);
599         nz = 0;
600         for (i=0; i<M; i++){
601           rnz = ai[i+1] - ai[i];
602           while (rnz--) {  /* Fortran row/col index! */
603             lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++;
604           }
605         }
606       }
607     }
608     break;
609   case 3:  /* distributed assembled matrix input (size>1) */
610     if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
611       valOnly = PETSC_FALSE;
612     } else {
613       valOnly = PETSC_TRUE; /* only update mat values, not row and col index */
614     }
615     ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr);
616     break;
617   default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS.");
618   }
619 
620   /* analysis phase */
621   /*----------------*/
622   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
623     lu->id.job = 1;
624 
625     lu->id.n = M;
626     switch (lu->id.ICNTL(18)){
627     case 0:  /* centralized assembled matrix input */
628       if (!lu->myid) {
629         lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
630         if (lu->id.ICNTL(6)>1){
631 #if defined(PETSC_USE_COMPLEX)
632           lu->id.a = (mumps_double_complex*)lu->val;
633 #else
634           lu->id.a = lu->val;
635 #endif
636         }
637       }
638       break;
639     case 3:  /* distributed assembled matrix input (size>1) */
640       lu->id.nz_loc = nnz;
641       lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
642       if (lu->id.ICNTL(6)>1) {
643 #if defined(PETSC_USE_COMPLEX)
644         lu->id.a_loc = (mumps_double_complex*)lu->val;
645 #else
646         lu->id.a_loc = lu->val;
647 #endif
648       }
649       /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
650       IS  is_iden;
651       Vec b;
652       if (!lu->myid){
653         ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);CHKERRQ(ierr);
654         ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);CHKERRQ(ierr);
655       } else {
656         ierr = VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);CHKERRQ(ierr);
657         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr);
658       }
659       ierr = VecCreate(A->comm,&b);CHKERRQ(ierr);
660       ierr = VecSetSizes(b,A->rmap.n,PETSC_DECIDE);CHKERRQ(ierr);
661       ierr = VecSetFromOptions(b);CHKERRQ(ierr);
662 
663       ierr = VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);CHKERRQ(ierr);
664       ierr = ISDestroy(is_iden);CHKERRQ(ierr);
665       ierr = VecDestroy(b);CHKERRQ(ierr);
666       break;
667     }
668 #if defined(PETSC_USE_COMPLEX)
669     zmumps_c(&lu->id);
670 #else
671     dmumps_c(&lu->id);
672 #endif
673     if (lu->id.INFOG(1) < 0) {
674       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
675     }
676   }
677 
678   /* numerical factorization phase */
679   /*-------------------------------*/
680   lu->id.job = 2;
681   if(!lu->id.ICNTL(18)) {
682     if (!lu->myid) {
683 #if defined(PETSC_USE_COMPLEX)
684       lu->id.a = (mumps_double_complex*)lu->val;
685 #else
686       lu->id.a = lu->val;
687 #endif
688     }
689   } else {
690 #if defined(PETSC_USE_COMPLEX)
691     lu->id.a_loc = (mumps_double_complex*)lu->val;
692 #else
693     lu->id.a_loc = lu->val;
694 #endif
695   }
696 #if defined(PETSC_USE_COMPLEX)
697   zmumps_c(&lu->id);
698 #else
699   dmumps_c(&lu->id);
700 #endif
701   if (lu->id.INFOG(1) < 0) {
702     if (lu->id.INFO(1) == -13) {
703       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2));
704     } else {
705       SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
706     }
707   }
708 
709   if (!lu->myid && lu->id.ICNTL(16) > 0){
710     SETERRQ1(PETSC_ERR_LIB,"  lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
711   }
712 
713   if (lu->size > 1){
714     if ((*F)->factor == FACTOR_LU){
715       F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
716     } else {
717       F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A;
718     }
719     F_diag->assembled = PETSC_TRUE;
720     if (lu->nSolve){
721       ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr);
722       ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr);
723       ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr);
724     }
725   }
726   (*F)->assembled   = PETSC_TRUE;
727   lu->matstruc      = SAME_NONZERO_PATTERN;
728   lu->CleanUpMUMPS  = PETSC_TRUE;
729   lu->nSolve        = 0;
730   PetscFunctionReturn(0);
731 }
732 
733 /* Note the Petsc r and c permutations are ignored */
734 #undef __FUNCT__
735 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS"
736 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) {
737   Mat            B;
738   Mat_MUMPS      *lu;
739   PetscErrorCode ierr;
740 
741   PetscFunctionBegin;
742   /* Create the factorization matrix */
743   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
744   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
745   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
746   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
747   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
748 
749   B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS;
750   B->factor               = FACTOR_LU;
751   lu                      = (Mat_MUMPS*)B->spptr;
752   lu->sym                 = 0;
753   lu->matstruc            = DIFFERENT_NONZERO_PATTERN;
754 
755   *F = B;
756   PetscFunctionReturn(0);
757 }
758 
759 /* Note the Petsc r permutation is ignored */
760 #undef __FUNCT__
761 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS"
762 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) {
763   Mat            B;
764   Mat_MUMPS      *lu;
765   PetscErrorCode ierr;
766 
767   PetscFunctionBegin;
768   /* Create the factorization matrix */
769   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
770   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
771   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
772   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
773   ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
774 
775   B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS;
776   B->ops->getinertia            = MatGetInertia_SBAIJMUMPS;
777   B->factor                     = FACTOR_CHOLESKY;
778   lu                            = (Mat_MUMPS*)B->spptr;
779   lu->sym                       = 2;
780   lu->matstruc                  = DIFFERENT_NONZERO_PATTERN;
781 
782   *F = B;
783   PetscFunctionReturn(0);
784 }
785 
786 #undef __FUNCT__
787 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS"
788 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) {
789   PetscErrorCode ierr;
790   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
791 
792   PetscFunctionBegin;
793   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
794 
795   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
796   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
797   A->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
798   PetscFunctionReturn(0);
799 }
800 
801 EXTERN_C_BEGIN
802 #undef __FUNCT__
803 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS"
804 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
805 {
806   PetscErrorCode ierr;
807   PetscMPIInt    size;
808   MPI_Comm       comm;
809   Mat            B=*newmat;
810   Mat_MUMPS      *mumps;
811 
812   PetscFunctionBegin;
813   if (reuse == MAT_INITIAL_MATRIX) {
814     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
815   }
816 
817   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
818   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
819 
820   mumps->MatDuplicate              = A->ops->duplicate;
821   mumps->MatView                   = A->ops->view;
822   mumps->MatAssemblyEnd            = A->ops->assemblyend;
823   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
824   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
825   mumps->MatDestroy                = A->ops->destroy;
826   mumps->specialdestroy            = MatDestroy_AIJMUMPS;
827   mumps->CleanUpMUMPS              = PETSC_FALSE;
828   mumps->isAIJ                     = PETSC_TRUE;
829 
830   B->spptr                         = (void*)mumps;
831   B->ops->duplicate                = MatDuplicate_MUMPS;
832   B->ops->view                     = MatView_MUMPS;
833   B->ops->assemblyend              = MatAssemblyEnd_AIJMUMPS;
834   B->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
835   B->ops->destroy                  = MatDestroy_MUMPS;
836 
837   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
838   if (size == 1) {
839     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C",
840                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
841     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C",
842                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
843   } else {
844     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C",
845                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
846     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C",
847                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
848   }
849 
850   ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr);
851   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
852   *newmat = B;
853   PetscFunctionReturn(0);
854 }
855 EXTERN_C_END
856 
857 /*MC
858   MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed
859   and sequential matrices via the external package MUMPS.
860 
861   If MUMPS is installed (see the manual for instructions
862   on how to declare the existence of external packages),
863   a matrix type can be constructed which invokes MUMPS solvers.
864   After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS).
865 
866   If created with a single process communicator, this matrix type inherits from MATSEQAIJ.
867   Otherwise, this matrix type inherits from MATMPIAIJ.  Hence for single process communicators,
868   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
869   for communicators controlling multiple processes.  It is recommended that you call both of
870   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
871   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
872   without data copy.
873 
874   Options Database Keys:
875 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions()
876 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
877 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level
878 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
879 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
880 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
881 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
882 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
883 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
884 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
885 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
886 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
887 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
888 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
889 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
890 
891   Level: beginner
892 
893 .seealso: MATSBAIJMUMPS
894 M*/
895 
896 EXTERN_C_BEGIN
897 #undef __FUNCT__
898 #define __FUNCT__ "MatCreate_AIJMUMPS"
899 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A)
900 {
901   PetscErrorCode ierr;
902   PetscMPIInt    size;
903 
904   PetscFunctionBegin;
905   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
906   if (size == 1) {
907     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
908   } else {
909     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
910     /*
911     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
912     ierr   = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
913     */
914   }
915   ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
916   PetscFunctionReturn(0);
917 }
918 EXTERN_C_END
919 
920 #undef __FUNCT__
921 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS"
922 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode)
923 {
924   PetscErrorCode ierr;
925   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
926 
927   PetscFunctionBegin;
928   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
929   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
930   A->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
931   PetscFunctionReturn(0);
932 }
933 
934 EXTERN_C_BEGIN
935 #undef __FUNCT__
936 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS"
937 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
938 {
939   Mat       A;
940   Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr;
941   PetscErrorCode ierr;
942 
943   PetscFunctionBegin;
944   /*
945     After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix
946     into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS.  I would
947     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
948     block size info so that PETSc can determine the local size properly.  The block size info is set
949     in the preallocation routine.
950   */
951   ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
952   A    = ((Mat_MPISBAIJ *)B->data)->A;
953   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
954   PetscFunctionReturn(0);
955 }
956 EXTERN_C_END
957 
958 EXTERN_C_BEGIN
959 #undef __FUNCT__
960 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS"
961 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
962 {
963   PetscErrorCode ierr;
964   PetscMPIInt    size;
965   MPI_Comm       comm;
966   Mat            B=*newmat;
967   Mat_MUMPS      *mumps;
968   void           (*f)(void);
969 
970   PetscFunctionBegin;
971   if (reuse == MAT_INITIAL_MATRIX) {
972     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
973   }
974 
975   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
976   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
977 
978   mumps->MatDuplicate              = A->ops->duplicate;
979   mumps->MatView                   = A->ops->view;
980   mumps->MatAssemblyEnd            = A->ops->assemblyend;
981   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
982   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
983   mumps->MatDestroy                = A->ops->destroy;
984   mumps->specialdestroy            = MatDestroy_SBAIJMUMPS;
985   mumps->CleanUpMUMPS              = PETSC_FALSE;
986   mumps->isAIJ                     = PETSC_FALSE;
987 
988   B->spptr                         = (void*)mumps;
989   B->ops->duplicate                = MatDuplicate_MUMPS;
990   B->ops->view                     = MatView_MUMPS;
991   B->ops->assemblyend              = MatAssemblyEnd_SBAIJMUMPS;
992   B->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
993   B->ops->destroy                  = MatDestroy_MUMPS;
994 
995   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
996   if (size == 1) {
997     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C",
998                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
999     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C",
1000                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
1001   } else {
1002   /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */
1003     ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr);
1004     if (f) { /* This case should always be true when this routine is called */
1005       mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f;
1006       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1007                                                "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS",
1008                                                MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr);
1009     }
1010     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C",
1011                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
1012     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C",
1013                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
1014   }
1015 
1016   ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr);
1017   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
1018   *newmat = B;
1019   PetscFunctionReturn(0);
1020 }
1021 EXTERN_C_END
1022 
1023 #undef __FUNCT__
1024 #define __FUNCT__ "MatDuplicate_MUMPS"
1025 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) {
1026   PetscErrorCode ierr;
1027   Mat_MUMPS   *lu=(Mat_MUMPS *)A->spptr;
1028 
1029   PetscFunctionBegin;
1030   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
1031   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr);
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 /*MC
1036   MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for
1037   distributed and sequential matrices via the external package MUMPS.
1038 
1039   If MUMPS is installed (see the manual for instructions
1040   on how to declare the existence of external packages),
1041   a matrix type can be constructed which invokes MUMPS solvers.
1042   After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS).
1043 
1044   If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ.
1045   Otherwise, this matrix type inherits from MATMPISBAIJ.  Hence for single process communicators,
1046   MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
1047   for communicators controlling multiple processes.  It is recommended that you call both of
1048   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
1049   conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size)
1050   without data copy.
1051 
1052   Options Database Keys:
1053 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions()
1054 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
1055 . -mat_mumps_icntl_4 <0,...,4> - print level
1056 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
1057 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
1058 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
1059 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
1060 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
1061 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
1062 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
1063 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
1064 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
1065 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
1066 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
1067 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
1068 
1069   Level: beginner
1070 
1071 .seealso: MATAIJMUMPS
1072 M*/
1073 
1074 EXTERN_C_BEGIN
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatCreate_SBAIJMUMPS"
1077 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A)
1078 {
1079   PetscErrorCode ierr;
1080   PetscMPIInt    size;
1081 
1082   PetscFunctionBegin;
1083   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
1084   if (size == 1) {
1085     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1086   } else {
1087     ierr   = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1088   }
1089   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 EXTERN_C_END
1093