xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 58cd72c3d0f0c5a63e84afc03ac54bb1bb84cd8d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Provides an interface to the MUMPS sparse solver
5 */
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13 #include "zmumps_c.h"
14 #else
15 #include "dmumps_c.h"
16 #endif
17 EXTERN_C_END
18 #define JOB_INIT -1
19 #define JOB_END -2
20 /* macros s.t. indices match MUMPS documentation */
21 #define ICNTL(I) icntl[(I)-1]
22 #define CNTL(I) cntl[(I)-1]
23 #define INFOG(I) infog[(I)-1]
24 #define INFO(I) info[(I)-1]
25 #define RINFOG(I) rinfog[(I)-1]
26 #define RINFO(I) rinfo[(I)-1]
27 
28 typedef struct {
29 #if defined(PETSC_USE_COMPLEX)
30   ZMUMPS_STRUC_C id;
31 #else
32   DMUMPS_STRUC_C id;
33 #endif
34   MatStructure   matstruc;
35   PetscMPIInt    myid,size;
36   PetscInt       *irn,*jcn,sym,nSolve;
37   PetscScalar    *val;
38   MPI_Comm       comm_mumps;
39   VecScatter     scat_rhs, scat_sol;
40   PetscTruth     isAIJ,CleanUpMUMPS;
41   Vec            b_seq,x_seq;
42   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   PetscErrorCode (*MatView)(Mat,PetscViewer);
44   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48   PetscErrorCode (*specialdestroy)(Mat);
49   PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*);
50 } Mat_MUMPS;
51 
52 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*);
53 EXTERN_C_BEGIN
54 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*);
55 EXTERN_C_END
56 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */
57 /*
58   input:
59     A       - matrix in mpiaij or mpisbaij (bs=1) format
60     shift   - 0: C style output triple; 1: Fortran style output triple.
61     valOnly - FALSE: spaces are allocated and values are set for the triple
62               TRUE:  only the values in v array are updated
63   output:
64     nnz     - dim of r, c, and v (number of local nonzero entries of A)
65     r, c, v - row and col index, matrix values (matrix triples)
66  */
67 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) {
68   PetscInt       *ai, *aj, *bi, *bj, rstart,nz, *garray;
69   PetscErrorCode ierr;
70   PetscInt       i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol;
71   PetscInt       *row,*col;
72   PetscScalar    *av, *bv,*val;
73   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
74 
75   PetscFunctionBegin;
76   if (mumps->isAIJ){
77     Mat_MPIAIJ    *mat =  (Mat_MPIAIJ*)A->data;
78     Mat_SeqAIJ    *aa=(Mat_SeqAIJ*)(mat->A)->data;
79     Mat_SeqAIJ    *bb=(Mat_SeqAIJ*)(mat->B)->data;
80     nz = aa->nz + bb->nz;
81     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
82     garray = mat->garray;
83     av=aa->a; bv=bb->a;
84 
85   } else {
86     Mat_MPISBAIJ  *mat =  (Mat_MPISBAIJ*)A->data;
87     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
88     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
89     if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs);
90     nz = aa->nz + bb->nz;
91     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
92     garray = mat->garray;
93     av=aa->a; bv=bb->a;
94   }
95 
96   if (!valOnly){
97     ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr);
98     ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr);
99     ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr);
100     *r = row; *c = col; *v = val;
101   } else {
102     row = *r; col = *c; val = *v;
103   }
104   *nnz = nz;
105 
106   jj = 0; irow = rstart;
107   for ( i=0; i<m; i++ ) {
108     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
109     countA = ai[i+1] - ai[i];
110     countB = bi[i+1] - bi[i];
111     bjj = bj + bi[i];
112 
113     /* get jB, the starting local col index for the 2nd B-part */
114     colA_start = rstart + ajj[0]; /* the smallest col index for A */
115     j=-1;
116     do {
117       j++;
118       if (j == countB) break;
119       jcol = garray[bjj[j]];
120     } while (jcol < colA_start);
121     jB = j;
122 
123     /* B-part, smaller col index */
124     colA_start = rstart + ajj[0]; /* the smallest col index for A */
125     for (j=0; j<jB; j++){
126       jcol = garray[bjj[j]];
127       if (!valOnly){
128         row[jj] = irow + shift; col[jj] = jcol + shift;
129 
130       }
131       val[jj++] = *bv++;
132     }
133     /* A-part */
134     for (j=0; j<countA; j++){
135       if (!valOnly){
136         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
137       }
138       val[jj++] = *av++;
139     }
140     /* B-part, larger col index */
141     for (j=jB; j<countB; j++){
142       if (!valOnly){
143         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
144       }
145       val[jj++] = *bv++;
146     }
147     irow++;
148   }
149 
150   PetscFunctionReturn(0);
151 }
152 
153 EXTERN_C_BEGIN
154 #undef __FUNCT__
155 #define __FUNCT__ "MatConvert_MUMPS_Base"
156 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
157 {
158   PetscErrorCode ierr;
159   Mat            B=*newmat;
160   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
161   void           (*f)(void);
162 
163   PetscFunctionBegin;
164   if (reuse == MAT_INITIAL_MATRIX) {
165     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
166   }
167   B->ops->duplicate              = mumps->MatDuplicate;
168   B->ops->view                   = mumps->MatView;
169   B->ops->assemblyend            = mumps->MatAssemblyEnd;
170   B->ops->lufactorsymbolic       = mumps->MatLUFactorSymbolic;
171   B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic;
172   B->ops->destroy                = mumps->MatDestroy;
173 
174   /* put back original composed preallocation function */
175   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr);
176   if (f) {
177     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr);
178   }
179 
180   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
181   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
182   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
183   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
184   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
185   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
186   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
187   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
188 
189   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
190   *newmat = B;
191   PetscFunctionReturn(0);
192 }
193 EXTERN_C_END
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatDestroy_MUMPS"
197 PetscErrorCode MatDestroy_MUMPS(Mat A)
198 {
199   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
200   PetscErrorCode ierr;
201   PetscMPIInt    size=lu->size;
202   PetscErrorCode (*specialdestroy)(Mat);
203   PetscFunctionBegin;
204   if (lu->CleanUpMUMPS) {
205     /* Terminate instance, deallocate memories */
206     if (size > 1){
207       ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr);
208       ierr = VecScatterDestroy(lu->scat_rhs);CHKERRQ(ierr);
209       ierr = VecDestroy(lu->b_seq);CHKERRQ(ierr);
210       ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr);
211       ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr);
212       ierr = PetscFree(lu->val);CHKERRQ(ierr);
213     }
214     lu->id.job=JOB_END;
215 #if defined(PETSC_USE_COMPLEX)
216     zmumps_c(&lu->id);
217 #else
218     dmumps_c(&lu->id);
219 #endif
220     ierr = PetscFree(lu->irn);CHKERRQ(ierr);
221     ierr = PetscFree(lu->jcn);CHKERRQ(ierr);
222     ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr);
223   }
224   specialdestroy = lu->specialdestroy;
225   ierr = (*specialdestroy)(A);CHKERRQ(ierr);
226   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "MatDestroy_AIJMUMPS"
232 PetscErrorCode MatDestroy_AIJMUMPS(Mat A)
233 {
234   PetscErrorCode ierr;
235   PetscMPIInt    size;
236 
237   PetscFunctionBegin;
238   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
239   if (size==1) {
240     ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
241   } else {
242     ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
243   }
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatDestroy_SBAIJMUMPS"
249 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A)
250 {
251   PetscErrorCode ierr;
252   PetscMPIInt    size;
253 
254   PetscFunctionBegin;
255   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
256   if (size==1) {
257     ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
258   } else {
259     ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
260   }
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatFactorInfo_MUMPS"
266 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) {
267   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
268   PetscErrorCode ierr;
269 
270   PetscFunctionBegin;
271   ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
272   ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                  %d \n",lu->id.sym);CHKERRQ(ierr);
273   ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):           %d \n",lu->id.par);CHKERRQ(ierr);
274   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):        %d \n",lu->id.ICNTL(1));CHKERRQ(ierr);
275   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg):%d \n",lu->id.ICNTL(2));CHKERRQ(ierr);
276   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):  %d \n",lu->id.ICNTL(3));CHKERRQ(ierr);
277   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):       %d \n",lu->id.ICNTL(4));CHKERRQ(ierr);
278   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):        %d \n",lu->id.ICNTL(5));CHKERRQ(ierr);
279   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):       %d \n",lu->id.ICNTL(6));CHKERRQ(ierr);
280   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (matrix ordering):         %d \n",lu->id.ICNTL(7));CHKERRQ(ierr);
281   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scalling strategy):       %d \n",lu->id.ICNTL(8));CHKERRQ(ierr);
282   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(9) (A/A^T x=b is solved):     %d \n",lu->id.ICNTL(9));CHKERRQ(ierr);
283   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr);
284   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):         %d \n",lu->id.ICNTL(11));CHKERRQ(ierr);
285   if (!lu->myid && lu->id.ICNTL(11)>0) {
286     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(4) (inf norm of input mat):        %g\n",lu->id.RINFOG(4));CHKERRQ(ierr);
287     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(5) (inf norm of solution):         %g\n",lu->id.RINFOG(5));CHKERRQ(ierr);
288     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(6) (inf norm of residual):         %g\n",lu->id.RINFOG(6));CHKERRQ(ierr);
289     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr);
290     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(9) (error estimate):               %g \n",lu->id.RINFOG(9));CHKERRQ(ierr);
291     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr);
292 
293   }
294   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",lu->id.ICNTL(12));CHKERRQ(ierr);
295   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",lu->id.ICNTL(13));CHKERRQ(ierr);
296   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr);
297   /* ICNTL(15-17) not used */
298   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",lu->id.ICNTL(18));CHKERRQ(ierr);
299   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Shur complement info):                       %d \n",lu->id.ICNTL(19));CHKERRQ(ierr);
300   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (rhs sparse pattern):                         %d \n",lu->id.ICNTL(20));CHKERRQ(ierr);
301   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d \n",lu->id.ICNTL(21));CHKERRQ(ierr);
302 
303   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",lu->id.CNTL(1));CHKERRQ(ierr);
304   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr);
305   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",lu->id.CNTL(3));CHKERRQ(ierr);
306   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",lu->id.CNTL(4));CHKERRQ(ierr);
307 
308   /* infomation local to each processor */
309   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);}
310   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr);
311   ierr = PetscSynchronizedFlush(A->comm);
312   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);}
313   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr);
314   ierr = PetscSynchronizedFlush(A->comm);
315   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);}
316   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr);
317   ierr = PetscSynchronizedFlush(A->comm);
318   /*
319   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(2) (info about error or warning ): \n");CHKERRQ(ierr);}
320   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(2));CHKERRQ(ierr);
321   ierr = PetscSynchronizedFlush(A->comm);
322   */
323 
324   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);}
325   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(15));CHKERRQ(ierr);
326   ierr = PetscSynchronizedFlush(A->comm);
327 
328   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);}
329   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(16));CHKERRQ(ierr);
330   ierr = PetscSynchronizedFlush(A->comm);
331 
332   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);}
333   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(23));CHKERRQ(ierr);
334   ierr = PetscSynchronizedFlush(A->comm);
335 
336   if (!lu->myid){ /* information from the host */
337     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr);
338     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr);
339     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr);
340 
341     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr);
342     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr);
343     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr);
344     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr);
345     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr);
346     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr);
347     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));CHKERRQ(ierr);
348     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));CHKERRQ(ierr);
349     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));CHKERRQ(ierr);
350     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr);
351     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr);
352     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr);
353     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr);
354     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr);
355     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr);
356     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr);
357     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr);
358      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr);
359      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));CHKERRQ(ierr);
360      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));CHKERRQ(ierr);
361      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));CHKERRQ(ierr);
362      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));CHKERRQ(ierr);
363      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));CHKERRQ(ierr);
364   }
365 
366   PetscFunctionReturn(0);
367 }
368 
369 #undef __FUNCT__
370 #define __FUNCT__ "MatView_MUMPS"
371 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) {
372   PetscErrorCode    ierr;
373   PetscTruth        iascii;
374   PetscViewerFormat format;
375   Mat_MUMPS         *mumps=(Mat_MUMPS*)(A->spptr);
376 
377   PetscFunctionBegin;
378   ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr);
379 
380   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
381   if (iascii) {
382     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
383     if (format == PETSC_VIEWER_ASCII_INFO){
384       ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr);
385     }
386   }
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNCT__
391 #define __FUNCT__ "MatSolve_AIJMUMPS"
392 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) {
393   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
394   PetscScalar    *array;
395   Vec            x_seq;
396   IS             is_iden,is_petsc;
397   VecScatter     scat_rhs=lu->scat_rhs,scat_sol=lu->scat_sol;
398   PetscErrorCode ierr;
399   PetscInt       i;
400 
401   PetscFunctionBegin;
402   lu->id.nrhs = 1;
403   x_seq = lu->b_seq;
404   if (lu->size > 1){
405     /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */
406     ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr);
407     ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr);
408     if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);}
409   } else {  /* size == 1 */
410     ierr = VecCopy(b,x);CHKERRQ(ierr);
411     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
412   }
413   if (!lu->myid) { /* define rhs on the host */
414 #if defined(PETSC_USE_COMPLEX)
415     lu->id.rhs = (mumps_double_complex*)array;
416 #else
417     lu->id.rhs = array;
418 #endif
419   }
420   if (lu->size == 1){
421     ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);
422   } else if (!lu->myid){
423     ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr);
424   }
425 
426   if (lu->size > 1){
427     /* distributed solution */
428     lu->id.ICNTL(21) = 1;
429     if (!lu->nSolve){
430       /* Create x_seq=sol_loc for repeated use */
431       PetscInt    lsol_loc;
432       PetscScalar *sol_loc;
433       lsol_loc = lu->id.INFO(23); /* length of sol_loc */
434       ierr = PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);CHKERRQ(ierr);
435       lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc);
436       lu->id.lsol_loc = lsol_loc;
437       lu->id.sol_loc  = (F_DOUBLE *)sol_loc;
438       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);CHKERRQ(ierr);
439     }
440   }
441 
442   /* solve phase */
443   /*-------------*/
444   lu->id.job = 3;
445 #if defined(PETSC_USE_COMPLEX)
446   zmumps_c(&lu->id);
447 #else
448   dmumps_c(&lu->id);
449 #endif
450   if (lu->id.INFOG(1) < 0) {
451     SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
452   }
453 
454   if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */
455     if (!lu->nSolve){ /* create scatter scat_sol */
456       ierr = ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
457       for (i=0; i<lu->id.lsol_loc; i++){
458         lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */
459       }
460       ierr = ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);CHKERRQ(ierr);  /* to */
461       ierr = VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);CHKERRQ(ierr);
462       ierr = ISDestroy(is_iden);CHKERRQ(ierr);
463       ierr = ISDestroy(is_petsc);CHKERRQ(ierr);
464     }
465     ierr = VecScatterBegin(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr);
466     ierr = VecScatterEnd(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr);
467   }
468   lu->nSolve++;
469   PetscFunctionReturn(0);
470 }
471 
472 /*
473   input:
474    F:        numeric factor
475   output:
476    nneg:     total number of negative pivots
477    nzero:    0
478    npos:     (global dimension of F) - nneg
479 */
480 
481 #undef __FUNCT__
482 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS"
483 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
484 {
485   Mat_MUMPS      *lu =(Mat_MUMPS*)F->spptr;
486   PetscErrorCode ierr;
487   PetscMPIInt    size;
488 
489   PetscFunctionBegin;
490   ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr);
491   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
492   if (size > 1 && lu->id.ICNTL(13) != 1){
493     SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
494   }
495   if (nneg){
496     if (!lu->myid){
497       *nneg = lu->id.INFOG(12);
498     }
499     ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr);
500   }
501   if (nzero) *nzero = 0;
502   if (npos)  *npos  = F->rmap.N - (*nneg);
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS"
508 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F)
509 {
510   Mat_MUMPS      *lu =(Mat_MUMPS*)(*F)->spptr;
511   Mat_MUMPS      *lua=(Mat_MUMPS*)(A)->spptr;
512   PetscErrorCode ierr;
513   PetscInt       rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl;
514   PetscTruth     valOnly,flg;
515   Mat            F_diag;
516 
517   PetscFunctionBegin;
518   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
519     (*F)->ops->solve    = MatSolve_AIJMUMPS;
520 
521     /* Initialize a MUMPS instance */
522     ierr = MPI_Comm_rank(A->comm, &lu->myid);
523     ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr);
524     lua->myid = lu->myid; lua->size = lu->size;
525     lu->id.job = JOB_INIT;
526     ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr);
527     ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr);
528 
529     /* Set mumps options */
530     ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
531     lu->id.par=1;  /* host participates factorizaton and solve */
532     lu->id.sym=lu->sym;
533     if (lu->sym == 2){
534       ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr);
535       if (flg && icntl == 1) lu->id.sym=icntl;  /* matrix is spd */
536     }
537 #if defined(PETSC_USE_COMPLEX)
538     zmumps_c(&lu->id);
539 #else
540     dmumps_c(&lu->id);
541 #endif
542 
543     if (lu->size == 1){
544       lu->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
545     } else {
546       lu->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
547     }
548 
549     icntl=-1;
550     ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
551     if ((flg && icntl > 0) || PetscLogPrintInfo) {
552       lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
553     } else { /* no output */
554       lu->id.ICNTL(1) = 0;  /* error message, default= 6 */
555       lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */
556       lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */
557       lu->id.ICNTL(4) = 0;  /* level of printing, 0,1,2,3,4, default=2 */
558     }
559     ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr);
560     icntl=-1;
561     ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
562     if (flg) {
563       if (icntl== 1){
564         SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
565       } else {
566         lu->id.ICNTL(7) = icntl;
567       }
568     }
569     ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr);
570     ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr);
571     ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr);
572     ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr);
573     ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr);
574     ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr);
575     ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr);
576 
577     ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr);
578     ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr);
579     ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr);
580     ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",lu->id.CNTL(4),&lu->id.CNTL(4),PETSC_NULL);CHKERRQ(ierr);
581     PetscOptionsEnd();
582   }
583 
584   /* define matrix A */
585   switch (lu->id.ICNTL(18)){
586   case 0:  /* centralized assembled matrix input (size=1) */
587     if (!lu->myid) {
588       if (lua->isAIJ){
589         Mat_SeqAIJ   *aa = (Mat_SeqAIJ*)A->data;
590         nz               = aa->nz;
591         ai = aa->i; aj = aa->j; lu->val = aa->a;
592       } else {
593         Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
594         nz                  =  aa->nz;
595         ai = aa->i; aj = aa->j; lu->val = aa->a;
596       }
597       if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */
598         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr);
599         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr);
600         nz = 0;
601         for (i=0; i<M; i++){
602           rnz = ai[i+1] - ai[i];
603           while (rnz--) {  /* Fortran row/col index! */
604             lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++;
605           }
606         }
607       }
608     }
609     break;
610   case 3:  /* distributed assembled matrix input (size>1) */
611     if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
612       valOnly = PETSC_FALSE;
613     } else {
614       valOnly = PETSC_TRUE; /* only update mat values, not row and col index */
615     }
616     ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr);
617     break;
618   default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS.");
619   }
620 
621   /* analysis phase */
622   /*----------------*/
623   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
624     lu->id.job = 1;
625 
626     lu->id.n = M;
627     switch (lu->id.ICNTL(18)){
628     case 0:  /* centralized assembled matrix input */
629       if (!lu->myid) {
630         lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
631         if (lu->id.ICNTL(6)>1){
632 #if defined(PETSC_USE_COMPLEX)
633           lu->id.a = (mumps_double_complex*)lu->val;
634 #else
635           lu->id.a = lu->val;
636 #endif
637         }
638       }
639       break;
640     case 3:  /* distributed assembled matrix input (size>1) */
641       lu->id.nz_loc = nnz;
642       lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
643       if (lu->id.ICNTL(6)>1) {
644 #if defined(PETSC_USE_COMPLEX)
645         lu->id.a_loc = (mumps_double_complex*)lu->val;
646 #else
647         lu->id.a_loc = lu->val;
648 #endif
649       }
650       /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
651       IS  is_iden;
652       Vec b;
653       if (!lu->myid){
654         ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);CHKERRQ(ierr);
655         ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);CHKERRQ(ierr);
656       } else {
657         ierr = VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);CHKERRQ(ierr);
658         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr);
659       }
660       ierr = VecCreate(A->comm,&b);CHKERRQ(ierr);
661       ierr = VecSetSizes(b,A->rmap.n,PETSC_DECIDE);CHKERRQ(ierr);
662       ierr = VecSetFromOptions(b);CHKERRQ(ierr);
663 
664       ierr = VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);CHKERRQ(ierr);
665       ierr = ISDestroy(is_iden);CHKERRQ(ierr);
666       ierr = VecDestroy(b);CHKERRQ(ierr);
667       break;
668     }
669 #if defined(PETSC_USE_COMPLEX)
670     zmumps_c(&lu->id);
671 #else
672     dmumps_c(&lu->id);
673 #endif
674     if (lu->id.INFOG(1) < 0) {
675       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
676     }
677   }
678 
679   /* numerical factorization phase */
680   /*-------------------------------*/
681   lu->id.job = 2;
682   if(!lu->id.ICNTL(18)) {
683     if (!lu->myid) {
684 #if defined(PETSC_USE_COMPLEX)
685       lu->id.a = (mumps_double_complex*)lu->val;
686 #else
687       lu->id.a = lu->val;
688 #endif
689     }
690   } else {
691 #if defined(PETSC_USE_COMPLEX)
692     lu->id.a_loc = (mumps_double_complex*)lu->val;
693 #else
694     lu->id.a_loc = lu->val;
695 #endif
696   }
697 #if defined(PETSC_USE_COMPLEX)
698   zmumps_c(&lu->id);
699 #else
700   dmumps_c(&lu->id);
701 #endif
702   if (lu->id.INFOG(1) < 0) {
703     if (lu->id.INFO(1) == -13) {
704       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2));
705     } else {
706       SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
707     }
708   }
709 
710   if (!lu->myid && lu->id.ICNTL(16) > 0){
711     SETERRQ1(PETSC_ERR_LIB,"  lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
712   }
713 
714   if (lu->size > 1){
715     if ((*F)->factor == FACTOR_LU){
716       F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
717     } else {
718       F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A;
719     }
720     F_diag->assembled = PETSC_TRUE;
721     if (lu->nSolve){
722       ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr);
723       ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr);
724       ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr);
725     }
726   }
727   (*F)->assembled   = PETSC_TRUE;
728   lu->matstruc      = SAME_NONZERO_PATTERN;
729   lu->CleanUpMUMPS  = PETSC_TRUE;
730   lu->nSolve        = 0;
731   PetscFunctionReturn(0);
732 }
733 
734 /* Note the Petsc r and c permutations are ignored */
735 #undef __FUNCT__
736 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS"
737 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) {
738   Mat            B;
739   Mat_MUMPS      *lu;
740   PetscErrorCode ierr;
741 
742   PetscFunctionBegin;
743   /* Create the factorization matrix */
744   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
745   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
746   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
747   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
748   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
749 
750   B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS;
751   B->factor               = FACTOR_LU;
752   lu                      = (Mat_MUMPS*)B->spptr;
753   lu->sym                 = 0;
754   lu->matstruc            = DIFFERENT_NONZERO_PATTERN;
755 
756   *F = B;
757   PetscFunctionReturn(0);
758 }
759 
760 /* Note the Petsc r permutation is ignored */
761 #undef __FUNCT__
762 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS"
763 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) {
764   Mat            B;
765   Mat_MUMPS      *lu;
766   PetscErrorCode ierr;
767 
768   PetscFunctionBegin;
769   /* Create the factorization matrix */
770   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
771   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
772   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
773   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
774   ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
775 
776   B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS;
777   B->ops->getinertia            = MatGetInertia_SBAIJMUMPS;
778   B->factor                     = FACTOR_CHOLESKY;
779   lu                            = (Mat_MUMPS*)B->spptr;
780   lu->sym                       = 2;
781   lu->matstruc                  = DIFFERENT_NONZERO_PATTERN;
782 
783   *F = B;
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNCT__
788 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS"
789 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) {
790   PetscErrorCode ierr;
791   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
792 
793   PetscFunctionBegin;
794   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
795 
796   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
797   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
798   A->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
799   PetscFunctionReturn(0);
800 }
801 
802 EXTERN_C_BEGIN
803 #undef __FUNCT__
804 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS"
805 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
806 {
807   PetscErrorCode ierr;
808   PetscMPIInt    size;
809   MPI_Comm       comm;
810   Mat            B=*newmat;
811   Mat_MUMPS      *mumps;
812 
813   PetscFunctionBegin;
814   if (reuse == MAT_INITIAL_MATRIX) {
815     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
816   }
817 
818   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
819   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
820 
821   mumps->MatDuplicate              = A->ops->duplicate;
822   mumps->MatView                   = A->ops->view;
823   mumps->MatAssemblyEnd            = A->ops->assemblyend;
824   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
825   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
826   mumps->MatDestroy                = A->ops->destroy;
827   mumps->specialdestroy            = MatDestroy_AIJMUMPS;
828   mumps->CleanUpMUMPS              = PETSC_FALSE;
829   mumps->isAIJ                     = PETSC_TRUE;
830 
831   B->spptr                         = (void*)mumps;
832   B->ops->duplicate                = MatDuplicate_MUMPS;
833   B->ops->view                     = MatView_MUMPS;
834   B->ops->assemblyend              = MatAssemblyEnd_AIJMUMPS;
835   B->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
836   B->ops->destroy                  = MatDestroy_MUMPS;
837 
838   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
839   if (size == 1) {
840     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C",
841                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
842     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C",
843                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
844   } else {
845     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C",
846                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
847     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C",
848                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
849   }
850 
851   ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr);
852   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
853   *newmat = B;
854   PetscFunctionReturn(0);
855 }
856 EXTERN_C_END
857 
858 /*MC
859   MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed
860   and sequential matrices via the external package MUMPS.
861 
862   If MUMPS is installed (see the manual for instructions
863   on how to declare the existence of external packages),
864   a matrix type can be constructed which invokes MUMPS solvers.
865   After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS).
866 
867   If created with a single process communicator, this matrix type inherits from MATSEQAIJ.
868   Otherwise, this matrix type inherits from MATMPIAIJ.  Hence for single process communicators,
869   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
870   for communicators controlling multiple processes.  It is recommended that you call both of
871   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
872   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
873   without data copy.
874 
875   Options Database Keys:
876 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions()
877 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
878 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level
879 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
880 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
881 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
882 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
883 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
884 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
885 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
886 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
887 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
888 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
889 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
890 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
891 
892   Level: beginner
893 
894 .seealso: MATSBAIJMUMPS
895 M*/
896 
897 EXTERN_C_BEGIN
898 #undef __FUNCT__
899 #define __FUNCT__ "MatCreate_AIJMUMPS"
900 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A)
901 {
902   PetscErrorCode ierr;
903   PetscMPIInt    size;
904 
905   PetscFunctionBegin;
906   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
907   if (size == 1) {
908     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
909   } else {
910     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
911     /*
912     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
913     ierr   = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
914     */
915   }
916   ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
917   PetscFunctionReturn(0);
918 }
919 EXTERN_C_END
920 
921 #undef __FUNCT__
922 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS"
923 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode)
924 {
925   PetscErrorCode ierr;
926   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
927 
928   PetscFunctionBegin;
929   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
930   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
931   A->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
932   PetscFunctionReturn(0);
933 }
934 
935 EXTERN_C_BEGIN
936 #undef __FUNCT__
937 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS"
938 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
939 {
940   Mat       A;
941   Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr;
942   PetscErrorCode ierr;
943 
944   PetscFunctionBegin;
945   /*
946     After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix
947     into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS.  I would
948     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
949     block size info so that PETSc can determine the local size properly.  The block size info is set
950     in the preallocation routine.
951   */
952   ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
953   A    = ((Mat_MPISBAIJ *)B->data)->A;
954   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 EXTERN_C_END
958 
959 EXTERN_C_BEGIN
960 #undef __FUNCT__
961 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS"
962 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
963 {
964   PetscErrorCode ierr;
965   PetscMPIInt    size;
966   MPI_Comm       comm;
967   Mat            B=*newmat;
968   Mat_MUMPS      *mumps;
969   void           (*f)(void);
970 
971   PetscFunctionBegin;
972   if (reuse == MAT_INITIAL_MATRIX) {
973     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
974   }
975 
976   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
977   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
978 
979   mumps->MatDuplicate              = A->ops->duplicate;
980   mumps->MatView                   = A->ops->view;
981   mumps->MatAssemblyEnd            = A->ops->assemblyend;
982   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
983   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
984   mumps->MatDestroy                = A->ops->destroy;
985   mumps->specialdestroy            = MatDestroy_SBAIJMUMPS;
986   mumps->CleanUpMUMPS              = PETSC_FALSE;
987   mumps->isAIJ                     = PETSC_FALSE;
988 
989   B->spptr                         = (void*)mumps;
990   B->ops->duplicate                = MatDuplicate_MUMPS;
991   B->ops->view                     = MatView_MUMPS;
992   B->ops->assemblyend              = MatAssemblyEnd_SBAIJMUMPS;
993   B->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
994   B->ops->destroy                  = MatDestroy_MUMPS;
995 
996   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
997   if (size == 1) {
998     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C",
999                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
1000     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C",
1001                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
1002   } else {
1003   /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */
1004     ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr);
1005     if (f) { /* This case should always be true when this routine is called */
1006       mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f;
1007       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1008                                                "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS",
1009                                                MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr);
1010     }
1011     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C",
1012                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
1013     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C",
1014                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
1015   }
1016 
1017   ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr);
1018   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
1019   *newmat = B;
1020   PetscFunctionReturn(0);
1021 }
1022 EXTERN_C_END
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatDuplicate_MUMPS"
1026 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) {
1027   PetscErrorCode ierr;
1028   Mat_MUMPS   *lu=(Mat_MUMPS *)A->spptr;
1029 
1030   PetscFunctionBegin;
1031   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
1032   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr);
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 /*MC
1037   MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for
1038   distributed and sequential matrices via the external package MUMPS.
1039 
1040   If MUMPS is installed (see the manual for instructions
1041   on how to declare the existence of external packages),
1042   a matrix type can be constructed which invokes MUMPS solvers.
1043   After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS).
1044 
1045   If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ.
1046   Otherwise, this matrix type inherits from MATMPISBAIJ.  Hence for single process communicators,
1047   MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
1048   for communicators controlling multiple processes.  It is recommended that you call both of
1049   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
1050   conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size)
1051   without data copy.
1052 
1053   Options Database Keys:
1054 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions()
1055 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
1056 . -mat_mumps_icntl_4 <0,...,4> - print level
1057 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
1058 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
1059 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
1060 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
1061 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
1062 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
1063 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
1064 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
1065 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
1066 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
1067 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
1068 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
1069 
1070   Level: beginner
1071 
1072 .seealso: MATAIJMUMPS
1073 M*/
1074 
1075 EXTERN_C_BEGIN
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatCreate_SBAIJMUMPS"
1078 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A)
1079 {
1080   PetscErrorCode ierr;
1081   PetscMPIInt    size;
1082 
1083   PetscFunctionBegin;
1084   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
1085   if (size == 1) {
1086     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1087   } else {
1088     ierr   = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1089   }
1090   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 EXTERN_C_END
1094