xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 2f59403fd2df48e396e6efa2e07d12d9da4cd99d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Provides an interface to the MUMPS sparse solver
5 */
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13 #include "zmumps_c.h"
14 #else
15 #include "dmumps_c.h"
16 #endif
17 EXTERN_C_END
18 #define JOB_INIT -1
19 #define JOB_END -2
20 /* macros s.t. indices match MUMPS documentation */
21 #define ICNTL(I) icntl[(I)-1]
22 #define CNTL(I) cntl[(I)-1]
23 #define INFOG(I) infog[(I)-1]
24 #define INFO(I) info[(I)-1]
25 #define RINFOG(I) rinfog[(I)-1]
26 #define RINFO(I) rinfo[(I)-1]
27 
28 typedef struct {
29 #if defined(PETSC_USE_COMPLEX)
30   ZMUMPS_STRUC_C id;
31 #else
32   DMUMPS_STRUC_C id;
33 #endif
34   MatStructure   matstruc;
35   PetscMPIInt    myid,size;
36   PetscInt       *irn,*jcn,sym,nSolve;
37   PetscScalar    *val;
38   MPI_Comm       comm_mumps;
39   VecScatter     scat_rhs, scat_sol;
40   PetscTruth     isAIJ,CleanUpMUMPS;
41   Vec            b_seq,x_seq;
42   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   PetscErrorCode (*MatView)(Mat,PetscViewer);
44   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48   PetscErrorCode (*specialdestroy)(Mat);
49   PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*);
50 } Mat_MUMPS;
51 
52 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*);
53 EXTERN_C_BEGIN
54 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*);
55 EXTERN_C_END
56 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */
57 /*
58   input:
59     A       - matrix in mpiaij or mpisbaij (bs=1) format
60     shift   - 0: C style output triple; 1: Fortran style output triple.
61     valOnly - FALSE: spaces are allocated and values are set for the triple
62               TRUE:  only the values in v array are updated
63   output:
64     nnz     - dim of r, c, and v (number of local nonzero entries of A)
65     r, c, v - row and col index, matrix values (matrix triples)
66  */
67 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) {
68   PetscInt       *ai, *aj, *bi, *bj, rstart,nz, *garray;
69   PetscErrorCode ierr;
70   PetscInt       i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol;
71   PetscInt       *row,*col;
72   PetscScalar    *av, *bv,*val;
73   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
74 
75   PetscFunctionBegin;
76   if (mumps->isAIJ){
77     Mat_MPIAIJ    *mat =  (Mat_MPIAIJ*)A->data;
78     Mat_SeqAIJ    *aa=(Mat_SeqAIJ*)(mat->A)->data;
79     Mat_SeqAIJ    *bb=(Mat_SeqAIJ*)(mat->B)->data;
80     nz = aa->nz + bb->nz;
81     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
82     garray = mat->garray;
83     av=aa->a; bv=bb->a;
84 
85   } else {
86     Mat_MPISBAIJ  *mat =  (Mat_MPISBAIJ*)A->data;
87     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
88     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
89     if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs);
90     nz = aa->nz + bb->nz;
91     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
92     garray = mat->garray;
93     av=aa->a; bv=bb->a;
94   }
95 
96   if (!valOnly){
97     ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr);
98     ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr);
99     ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr);
100     *r = row; *c = col; *v = val;
101   } else {
102     row = *r; col = *c; val = *v;
103   }
104   *nnz = nz;
105 
106   jj = 0; irow = rstart;
107   for ( i=0; i<m; i++ ) {
108     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
109     countA = ai[i+1] - ai[i];
110     countB = bi[i+1] - bi[i];
111     bjj = bj + bi[i];
112 
113     /* get jB, the starting local col index for the 2nd B-part */
114     colA_start = rstart + ajj[0]; /* the smallest col index for A */
115     j=-1;
116     do {
117       j++;
118       if (j == countB) break;
119       jcol = garray[bjj[j]];
120     } while (jcol < colA_start);
121     jB = j;
122 
123     /* B-part, smaller col index */
124     colA_start = rstart + ajj[0]; /* the smallest col index for A */
125     for (j=0; j<jB; j++){
126       jcol = garray[bjj[j]];
127       if (!valOnly){
128         row[jj] = irow + shift; col[jj] = jcol + shift;
129 
130       }
131       val[jj++] = *bv++;
132     }
133     /* A-part */
134     for (j=0; j<countA; j++){
135       if (!valOnly){
136         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
137       }
138       val[jj++] = *av++;
139     }
140     /* B-part, larger col index */
141     for (j=jB; j<countB; j++){
142       if (!valOnly){
143         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
144       }
145       val[jj++] = *bv++;
146     }
147     irow++;
148   }
149 
150   PetscFunctionReturn(0);
151 }
152 
153 EXTERN_C_BEGIN
154 #undef __FUNCT__
155 #define __FUNCT__ "MatConvert_MUMPS_Base"
156 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat) \
157 {
158   PetscErrorCode ierr;
159   Mat            B=*newmat;
160   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
161   void           (*f)(void);
162 
163   PetscFunctionBegin;
164   if (reuse == MAT_INITIAL_MATRIX) {
165     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
166   }
167   B->ops->duplicate              = mumps->MatDuplicate;
168   B->ops->view                   = mumps->MatView;
169   B->ops->assemblyend            = mumps->MatAssemblyEnd;
170   B->ops->lufactorsymbolic       = mumps->MatLUFactorSymbolic;
171   B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic;
172   B->ops->destroy                = mumps->MatDestroy;
173 
174   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr);
175   if (f) {
176     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr);
177   }
178 
179   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
180   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
181   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
182   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
183   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
184   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
185   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
186   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
187 
188   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
189   *newmat = B;
190   PetscFunctionReturn(0);
191 }
192 EXTERN_C_END
193 
194 #undef __FUNCT__
195 #define __FUNCT__ "MatDestroy_MUMPS"
196 PetscErrorCode MatDestroy_MUMPS(Mat A)
197 {
198   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
199   PetscErrorCode ierr;
200   PetscMPIInt    size=lu->size;
201   PetscErrorCode (*specialdestroy)(Mat);
202   PetscFunctionBegin;
203   if (lu->CleanUpMUMPS) {
204     /* Terminate instance, deallocate memories */
205     if (size > 1){
206       ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr);
207       ierr = VecScatterDestroy(lu->scat_rhs);CHKERRQ(ierr);
208       ierr = VecDestroy(lu->b_seq);CHKERRQ(ierr);
209       ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr);
210       ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr);
211       ierr = PetscFree(lu->val);CHKERRQ(ierr);
212     }
213     lu->id.job=JOB_END;
214 #if defined(PETSC_USE_COMPLEX)
215     zmumps_c(&lu->id);
216 #else
217     dmumps_c(&lu->id);
218 #endif
219     ierr = PetscFree(lu->irn);CHKERRQ(ierr);
220     ierr = PetscFree(lu->jcn);CHKERRQ(ierr);
221     ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr);
222   }
223   specialdestroy = lu->specialdestroy;
224   ierr = (*specialdestroy)(A);CHKERRQ(ierr);
225   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
226   PetscFunctionReturn(0);
227 }
228 
229 #undef __FUNCT__
230 #define __FUNCT__ "MatDestroy_AIJMUMPS"
231 PetscErrorCode MatDestroy_AIJMUMPS(Mat A)
232 {
233   PetscErrorCode ierr;
234   PetscMPIInt    size;
235 
236   PetscFunctionBegin;
237   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
238   if (size==1) {
239     ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
240   } else {
241     ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatDestroy_SBAIJMUMPS"
248 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A)
249 {
250   PetscErrorCode ierr;
251   PetscMPIInt    size;
252 
253   PetscFunctionBegin;
254   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
255   if (size==1) {
256     ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
257   } else {
258     ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
259   }
260   PetscFunctionReturn(0);
261 }
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "MatFactorInfo_MUMPS"
265 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) {
266   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
267   PetscErrorCode ierr;
268 
269   PetscFunctionBegin;
270   ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
271   ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                  %d \n",lu->id.sym);CHKERRQ(ierr);
272   ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):           %d \n",lu->id.par);CHKERRQ(ierr);
273   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):        %d \n",lu->id.ICNTL(1));CHKERRQ(ierr);
274   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg):%d \n",lu->id.ICNTL(2));CHKERRQ(ierr);
275   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):  %d \n",lu->id.ICNTL(3));CHKERRQ(ierr);
276   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):       %d \n",lu->id.ICNTL(4));CHKERRQ(ierr);
277   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):        %d \n",lu->id.ICNTL(5));CHKERRQ(ierr);
278   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):       %d \n",lu->id.ICNTL(6));CHKERRQ(ierr);
279   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (matrix ordering):         %d \n",lu->id.ICNTL(7));CHKERRQ(ierr);
280   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scalling strategy):       %d \n",lu->id.ICNTL(8));CHKERRQ(ierr);
281   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(9) (A/A^T x=b is solved):     %d \n",lu->id.ICNTL(9));CHKERRQ(ierr);
282   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr);
283   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):         %d \n",lu->id.ICNTL(11));CHKERRQ(ierr);
284   if (!lu->myid && lu->id.ICNTL(11)>0) {
285     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(4) (inf norm of input mat):        %g\n",lu->id.RINFOG(4));CHKERRQ(ierr);
286     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(5) (inf norm of solution):         %g\n",lu->id.RINFOG(5));CHKERRQ(ierr);
287     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(6) (inf norm of residual):         %g\n",lu->id.RINFOG(6));CHKERRQ(ierr);
288     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr);
289     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(9) (error estimate):               %g \n",lu->id.RINFOG(9));CHKERRQ(ierr);
290     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr);
291 
292   }
293   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",lu->id.ICNTL(12));CHKERRQ(ierr);
294   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",lu->id.ICNTL(13));CHKERRQ(ierr);
295   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr);
296   /* ICNTL(15-17) not used */
297   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",lu->id.ICNTL(18));CHKERRQ(ierr);
298   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Shur complement info):                       %d \n",lu->id.ICNTL(19));CHKERRQ(ierr);
299   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (rhs sparse pattern):                         %d \n",lu->id.ICNTL(20));CHKERRQ(ierr);
300   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d \n",lu->id.ICNTL(21));CHKERRQ(ierr);
301 
302   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",lu->id.CNTL(1));CHKERRQ(ierr);
303   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr);
304   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",lu->id.CNTL(3));CHKERRQ(ierr);
305   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",lu->id.CNTL(4));CHKERRQ(ierr);
306 
307   /* infomation local to each processor */
308   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);}
309   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr);
310   ierr = PetscSynchronizedFlush(A->comm);
311   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);}
312   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr);
313   ierr = PetscSynchronizedFlush(A->comm);
314   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);}
315   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr);
316   ierr = PetscSynchronizedFlush(A->comm);
317   /*
318   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(2) (info about error or warning ): \n");CHKERRQ(ierr);}
319   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(2));CHKERRQ(ierr);
320   ierr = PetscSynchronizedFlush(A->comm);
321   */
322 
323   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);}
324   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(15));CHKERRQ(ierr);
325   ierr = PetscSynchronizedFlush(A->comm);
326 
327   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);}
328   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(16));CHKERRQ(ierr);
329   ierr = PetscSynchronizedFlush(A->comm);
330 
331   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);}
332   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(23));CHKERRQ(ierr);
333   ierr = PetscSynchronizedFlush(A->comm);
334 
335   if (!lu->myid){ /* information from the host */
336     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr);
337     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr);
338     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr);
339 
340     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr);
341     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr);
342     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr);
343     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr);
344     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr);
345     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr);
346     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));CHKERRQ(ierr);
347     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));CHKERRQ(ierr);
348     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));CHKERRQ(ierr);
349     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr);
350     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr);
351     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr);
352     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr);
353     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr);
354     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr);
355     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr);
356     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr);
357      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr);
358      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));CHKERRQ(ierr);
359      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));CHKERRQ(ierr);
360      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));CHKERRQ(ierr);
361      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));CHKERRQ(ierr);
362      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));CHKERRQ(ierr);
363   }
364 
365   PetscFunctionReturn(0);
366 }
367 
368 #undef __FUNCT__
369 #define __FUNCT__ "MatView_MUMPS"
370 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) {
371   PetscErrorCode    ierr;
372   PetscTruth        iascii;
373   PetscViewerFormat format;
374   Mat_MUMPS         *mumps=(Mat_MUMPS*)(A->spptr);
375 
376   PetscFunctionBegin;
377   ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr);
378 
379   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
380   if (iascii) {
381     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
382     if (format == PETSC_VIEWER_ASCII_INFO){
383       ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr);
384     }
385   }
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatSolve_AIJMUMPS"
391 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) {
392   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
393   PetscScalar    *array;
394   Vec            x_seq;
395   IS             is_iden,is_petsc;
396   VecScatter     scat_rhs=lu->scat_rhs,scat_sol=lu->scat_sol;
397   PetscErrorCode ierr;
398   PetscInt       i;
399 
400   PetscFunctionBegin;
401   lu->id.nrhs = 1;
402   x_seq = lu->b_seq;
403   if (lu->size > 1){
404     /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */
405     ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr);
406     ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr);
407     if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);}
408   } else {  /* size == 1 */
409     ierr = VecCopy(b,x);CHKERRQ(ierr);
410     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
411   }
412   if (!lu->myid) { /* define rhs on the host */
413 #if defined(PETSC_USE_COMPLEX)
414     lu->id.rhs = (mumps_double_complex*)array;
415 #else
416     lu->id.rhs = array;
417 #endif
418   }
419   if (lu->size == 1){
420     ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);
421   } else if (!lu->myid){
422     ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr);
423   }
424 
425   if (lu->size > 1){
426     /* distributed solution */
427     lu->id.ICNTL(21) = 1;
428     if (!lu->nSolve){
429       /* Create x_seq=sol_loc for repeated use */
430       PetscInt    lsol_loc;
431       PetscScalar *sol_loc;
432       lsol_loc = lu->id.INFO(23); /* length of sol_loc */
433       ierr = PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);CHKERRQ(ierr);
434       lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc);
435       lu->id.lsol_loc = lsol_loc;
436       lu->id.sol_loc  = (F_DOUBLE *)sol_loc;
437       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);CHKERRQ(ierr);
438     }
439   }
440 
441   /* solve phase */
442   /*-------------*/
443   lu->id.job = 3;
444 #if defined(PETSC_USE_COMPLEX)
445   zmumps_c(&lu->id);
446 #else
447   dmumps_c(&lu->id);
448 #endif
449   if (lu->id.INFOG(1) < 0) {
450     SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
451   }
452 
453   if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */
454     if (!lu->nSolve){ /* create scatter scat_sol */
455       ierr = ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
456       for (i=0; i<lu->id.lsol_loc; i++){
457         lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */
458       }
459       ierr = ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);CHKERRQ(ierr);  /* to */
460       ierr = VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);CHKERRQ(ierr);
461       ierr = ISDestroy(is_iden);CHKERRQ(ierr);
462       ierr = ISDestroy(is_petsc);CHKERRQ(ierr);
463     }
464     ierr = VecScatterBegin(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr);
465     ierr = VecScatterEnd(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr);
466   }
467   lu->nSolve++;
468   PetscFunctionReturn(0);
469 }
470 
471 /*
472   input:
473    F:        numeric factor
474   output:
475    nneg:     total number of negative pivots
476    nzero:    0
477    npos:     (global dimension of F) - nneg
478 */
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS"
482 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
483 {
484   Mat_MUMPS      *lu =(Mat_MUMPS*)F->spptr;
485   PetscErrorCode ierr;
486   PetscMPIInt    size;
487 
488   PetscFunctionBegin;
489   ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr);
490   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
491   if (size > 1 && lu->id.ICNTL(13) != 1){
492     SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
493   }
494   if (nneg){
495     if (!lu->myid){
496       *nneg = lu->id.INFOG(12);
497     }
498     ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr);
499   }
500   if (nzero) *nzero = 0;
501   if (npos)  *npos  = F->rmap.N - (*nneg);
502   PetscFunctionReturn(0);
503 }
504 
505 #undef __FUNCT__
506 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS"
507 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F)
508 {
509   Mat_MUMPS      *lu =(Mat_MUMPS*)(*F)->spptr;
510   Mat_MUMPS      *lua=(Mat_MUMPS*)(A)->spptr;
511   PetscErrorCode ierr;
512   PetscInt       rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl;
513   PetscTruth     valOnly,flg;
514   Mat            F_diag;
515 
516   PetscFunctionBegin;
517   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
518     (*F)->ops->solve    = MatSolve_AIJMUMPS;
519 
520     /* Initialize a MUMPS instance */
521     ierr = MPI_Comm_rank(A->comm, &lu->myid);
522     ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr);
523     lua->myid = lu->myid; lua->size = lu->size;
524     lu->id.job = JOB_INIT;
525     ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr);
526     ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr);
527 
528     /* Set mumps options */
529     ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
530     lu->id.par=1;  /* host participates factorizaton and solve */
531     lu->id.sym=lu->sym;
532     if (lu->sym == 2){
533       ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr);
534       if (flg && icntl == 1) lu->id.sym=icntl;  /* matrix is spd */
535     }
536 #if defined(PETSC_USE_COMPLEX)
537     zmumps_c(&lu->id);
538 #else
539     dmumps_c(&lu->id);
540 #endif
541 
542     if (lu->size == 1){
543       lu->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
544     } else {
545       lu->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
546     }
547 
548     icntl=-1;
549     ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
550     if ((flg && icntl > 0) || PetscLogPrintInfo) {
551       lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
552     } else { /* no output */
553       lu->id.ICNTL(1) = 0;  /* error message, default= 6 */
554       lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */
555       lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */
556       lu->id.ICNTL(4) = 0;  /* level of printing, 0,1,2,3,4, default=2 */
557     }
558     ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr);
559     icntl=-1;
560     ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
561     if (flg) {
562       if (icntl== 1){
563         SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
564       } else {
565         lu->id.ICNTL(7) = icntl;
566       }
567     }
568     ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr);
569     ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr);
570     ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr);
571     ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr);
572     ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr);
573     ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr);
574     ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr);
575 
576     ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr);
577     ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr);
578     ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr);
579     PetscOptionsEnd();
580   }
581 
582   /* define matrix A */
583   switch (lu->id.ICNTL(18)){
584   case 0:  /* centralized assembled matrix input (size=1) */
585     if (!lu->myid) {
586       if (lua->isAIJ){
587         Mat_SeqAIJ   *aa = (Mat_SeqAIJ*)A->data;
588         nz               = aa->nz;
589         ai = aa->i; aj = aa->j; lu->val = aa->a;
590       } else {
591         Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
592         nz                  =  aa->nz;
593         ai = aa->i; aj = aa->j; lu->val = aa->a;
594       }
595       if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */
596         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr);
597         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr);
598         nz = 0;
599         for (i=0; i<M; i++){
600           rnz = ai[i+1] - ai[i];
601           while (rnz--) {  /* Fortran row/col index! */
602             lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++;
603           }
604         }
605       }
606     }
607     break;
608   case 3:  /* distributed assembled matrix input (size>1) */
609     if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
610       valOnly = PETSC_FALSE;
611     } else {
612       valOnly = PETSC_TRUE; /* only update mat values, not row and col index */
613     }
614     ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr);
615     break;
616   default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS.");
617   }
618 
619   /* analysis phase */
620   /*----------------*/
621   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
622     lu->id.job = 1;
623 
624     lu->id.n = M;
625     switch (lu->id.ICNTL(18)){
626     case 0:  /* centralized assembled matrix input */
627       if (!lu->myid) {
628         lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
629         if (lu->id.ICNTL(6)>1){
630 #if defined(PETSC_USE_COMPLEX)
631           lu->id.a = (mumps_double_complex*)lu->val;
632 #else
633           lu->id.a = lu->val;
634 #endif
635         }
636       }
637       break;
638     case 3:  /* distributed assembled matrix input (size>1) */
639       lu->id.nz_loc = nnz;
640       lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
641       if (lu->id.ICNTL(6)>1) {
642 #if defined(PETSC_USE_COMPLEX)
643         lu->id.a_loc = (mumps_double_complex*)lu->val;
644 #else
645         lu->id.a_loc = lu->val;
646 #endif
647       }
648       /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
649       IS  is_iden;
650       Vec b;
651       if (!lu->myid){
652         ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);CHKERRQ(ierr);
653         ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);CHKERRQ(ierr);
654       } else {
655         ierr = VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);CHKERRQ(ierr);
656         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr);
657       }
658       ierr = VecCreate(A->comm,&b);CHKERRQ(ierr);
659       ierr = VecSetSizes(b,A->rmap.n,PETSC_DECIDE);CHKERRQ(ierr);
660       ierr = VecSetFromOptions(b);CHKERRQ(ierr);
661 
662       ierr = VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);CHKERRQ(ierr);
663       ierr = ISDestroy(is_iden);CHKERRQ(ierr);
664       ierr = VecDestroy(b);CHKERRQ(ierr);
665       break;
666     }
667 #if defined(PETSC_USE_COMPLEX)
668     zmumps_c(&lu->id);
669 #else
670     dmumps_c(&lu->id);
671 #endif
672     if (lu->id.INFOG(1) < 0) {
673       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
674     }
675   }
676 
677   /* numerical factorization phase */
678   /*-------------------------------*/
679   lu->id.job = 2;
680   if(!lu->id.ICNTL(18)) {
681     if (!lu->myid) {
682 #if defined(PETSC_USE_COMPLEX)
683       lu->id.a = (mumps_double_complex*)lu->val;
684 #else
685       lu->id.a = lu->val;
686 #endif
687     }
688   } else {
689 #if defined(PETSC_USE_COMPLEX)
690     lu->id.a_loc = (mumps_double_complex*)lu->val;
691 #else
692     lu->id.a_loc = lu->val;
693 #endif
694   }
695 #if defined(PETSC_USE_COMPLEX)
696   zmumps_c(&lu->id);
697 #else
698   dmumps_c(&lu->id);
699 #endif
700   if (lu->id.INFOG(1) < 0) {
701     if (lu->id.INFO(1) == -13) {
702       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2));
703     } else {
704       SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
705     }
706   }
707 
708   if (!lu->myid && lu->id.ICNTL(16) > 0){
709     SETERRQ1(PETSC_ERR_LIB,"  lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
710   }
711 
712   if (lu->size > 1){
713     if ((*F)->factor == FACTOR_LU){
714       F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
715     } else {
716       F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A;
717     }
718     F_diag->assembled = PETSC_TRUE;
719     if (lu->nSolve){
720       ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr);
721       ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr);
722       ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr);
723     }
724   }
725   (*F)->assembled   = PETSC_TRUE;
726   lu->matstruc      = SAME_NONZERO_PATTERN;
727   lu->CleanUpMUMPS  = PETSC_TRUE;
728   lu->nSolve        = 0;
729   PetscFunctionReturn(0);
730 }
731 
732 /* Note the Petsc r and c permutations are ignored */
733 #undef __FUNCT__
734 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS"
735 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) {
736   Mat            B;
737   Mat_MUMPS      *lu;
738   PetscErrorCode ierr;
739 
740   PetscFunctionBegin;
741   /* Create the factorization matrix */
742   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
743   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
744   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
745   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
746   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
747 
748   B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS;
749   B->factor               = FACTOR_LU;
750   lu                      = (Mat_MUMPS*)B->spptr;
751   lu->sym                 = 0;
752   lu->matstruc            = DIFFERENT_NONZERO_PATTERN;
753 
754   *F = B;
755   PetscFunctionReturn(0);
756 }
757 
758 /* Note the Petsc r permutation is ignored */
759 #undef __FUNCT__
760 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS"
761 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) {
762   Mat            B;
763   Mat_MUMPS      *lu;
764   PetscErrorCode ierr;
765 
766   PetscFunctionBegin;
767   /* Create the factorization matrix */
768   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
769   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
770   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
771   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
772   ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
773 
774   B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS;
775   B->ops->getinertia            = MatGetInertia_SBAIJMUMPS;
776   B->factor                     = FACTOR_CHOLESKY;
777   lu                            = (Mat_MUMPS*)B->spptr;
778   lu->sym                       = 2;
779   lu->matstruc                  = DIFFERENT_NONZERO_PATTERN;
780 
781   *F = B;
782   PetscFunctionReturn(0);
783 }
784 
785 #undef __FUNCT__
786 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS"
787 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) {
788   PetscErrorCode ierr;
789   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
790 
791   PetscFunctionBegin;
792   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
793 
794   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
795   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
796   A->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
797   PetscFunctionReturn(0);
798 }
799 
800 EXTERN_C_BEGIN
801 #undef __FUNCT__
802 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS"
803 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
804 {
805   PetscErrorCode ierr;
806   PetscMPIInt    size;
807   MPI_Comm       comm;
808   Mat            B=*newmat;
809   Mat_MUMPS      *mumps;
810 
811   PetscFunctionBegin;
812   if (reuse == MAT_INITIAL_MATRIX) {
813     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
814   }
815 
816   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
817   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
818 
819   mumps->MatDuplicate              = A->ops->duplicate;
820   mumps->MatView                   = A->ops->view;
821   mumps->MatAssemblyEnd            = A->ops->assemblyend;
822   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
823   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
824   mumps->MatDestroy                = A->ops->destroy;
825   mumps->specialdestroy            = MatDestroy_AIJMUMPS;
826   mumps->CleanUpMUMPS              = PETSC_FALSE;
827   mumps->isAIJ                     = PETSC_TRUE;
828 
829   B->spptr                         = (void*)mumps;
830   B->ops->duplicate                = MatDuplicate_MUMPS;
831   B->ops->view                     = MatView_MUMPS;
832   B->ops->assemblyend              = MatAssemblyEnd_AIJMUMPS;
833   B->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
834   B->ops->destroy                  = MatDestroy_MUMPS;
835 
836   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
837   if (size == 1) {
838     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C",
839                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
840     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C",
841                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
842   } else {
843     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C",
844                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
845     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C",
846                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
847   }
848 
849   ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr);
850   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
851   *newmat = B;
852   PetscFunctionReturn(0);
853 }
854 EXTERN_C_END
855 
856 /*MC
857   MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed
858   and sequential matrices via the external package MUMPS.
859 
860   If MUMPS is installed (see the manual for instructions
861   on how to declare the existence of external packages),
862   a matrix type can be constructed which invokes MUMPS solvers.
863   After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS).
864 
865   If created with a single process communicator, this matrix type inherits from MATSEQAIJ.
866   Otherwise, this matrix type inherits from MATMPIAIJ.  Hence for single process communicators,
867   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
868   for communicators controlling multiple processes.  It is recommended that you call both of
869   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
870   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
871   without data copy.
872 
873   Options Database Keys:
874 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions()
875 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
876 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level
877 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
878 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
879 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
880 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
881 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
882 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
883 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
884 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
885 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
886 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
887 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
888 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
889 
890   Level: beginner
891 
892 .seealso: MATSBAIJMUMPS
893 M*/
894 
895 EXTERN_C_BEGIN
896 #undef __FUNCT__
897 #define __FUNCT__ "MatCreate_AIJMUMPS"
898 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A)
899 {
900   PetscErrorCode ierr;
901   PetscMPIInt    size;
902   MPI_Comm       comm;
903 
904   PetscFunctionBegin;
905   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
906   /*   and AIJMUMPS types */
907   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr);
908   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
909   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
910   if (size == 1) {
911     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
912   } else {
913     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
914     /*
915     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
916     ierr   = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
917     */
918   }
919   ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
920   PetscFunctionReturn(0);
921 }
922 EXTERN_C_END
923 
924 #undef __FUNCT__
925 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS"
926 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode)
927 {
928   PetscErrorCode ierr;
929   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
930 
931   PetscFunctionBegin;
932   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
933   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
934   A->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
935   PetscFunctionReturn(0);
936 }
937 
938 EXTERN_C_BEGIN
939 #undef __FUNCT__
940 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS"
941 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
942 {
943   Mat       A;
944   Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr;
945   PetscErrorCode ierr;
946 
947   PetscFunctionBegin;
948   /*
949     After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix
950     into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS.  I would
951     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
952     block size info so that PETSc can determine the local size properly.  The block size info is set
953     in the preallocation routine.
954   */
955   ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
956   A    = ((Mat_MPISBAIJ *)B->data)->A;
957   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
958   PetscFunctionReturn(0);
959 }
960 EXTERN_C_END
961 
962 EXTERN_C_BEGIN
963 #undef __FUNCT__
964 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS"
965 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
966 {
967   PetscErrorCode ierr;
968   PetscMPIInt    size;
969   MPI_Comm       comm;
970   Mat            B=*newmat;
971   Mat_MUMPS      *mumps;
972   void           (*f)(void);
973 
974   PetscFunctionBegin;
975   if (reuse == MAT_INITIAL_MATRIX) {
976     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
977   }
978 
979   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
980   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
981 
982   mumps->MatDuplicate              = A->ops->duplicate;
983   mumps->MatView                   = A->ops->view;
984   mumps->MatAssemblyEnd            = A->ops->assemblyend;
985   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
986   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
987   mumps->MatDestroy                = A->ops->destroy;
988   mumps->specialdestroy            = MatDestroy_SBAIJMUMPS;
989   mumps->CleanUpMUMPS              = PETSC_FALSE;
990   mumps->isAIJ                     = PETSC_FALSE;
991 
992   B->spptr                         = (void*)mumps;
993   B->ops->duplicate                = MatDuplicate_MUMPS;
994   B->ops->view                     = MatView_MUMPS;
995   B->ops->assemblyend              = MatAssemblyEnd_SBAIJMUMPS;
996   B->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
997   B->ops->destroy                  = MatDestroy_MUMPS;
998 
999   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
1000   if (size == 1) {
1001     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C",
1002                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
1003     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C",
1004                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
1005   } else {
1006   /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */
1007     ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr);
1008     if (f) { /* This case should always be true when this routine is called */
1009       mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f;
1010       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1011                                                "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS",
1012                                                MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr);
1013     }
1014     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C",
1015                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
1016     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C",
1017                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
1018   }
1019 
1020   ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr);
1021   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
1022   *newmat = B;
1023   PetscFunctionReturn(0);
1024 }
1025 EXTERN_C_END
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatDuplicate_MUMPS"
1029 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) {
1030   PetscErrorCode ierr;
1031   Mat_MUMPS   *lu=(Mat_MUMPS *)A->spptr;
1032 
1033   PetscFunctionBegin;
1034   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
1035   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr);
1036   PetscFunctionReturn(0);
1037 }
1038 
1039 /*MC
1040   MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for
1041   distributed and sequential matrices via the external package MUMPS.
1042 
1043   If MUMPS is installed (see the manual for instructions
1044   on how to declare the existence of external packages),
1045   a matrix type can be constructed which invokes MUMPS solvers.
1046   After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS).
1047 
1048   If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ.
1049   Otherwise, this matrix type inherits from MATMPISBAIJ.  Hence for single process communicators,
1050   MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
1051   for communicators controlling multiple processes.  It is recommended that you call both of
1052   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
1053   conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size)
1054   without data copy.
1055 
1056   Options Database Keys:
1057 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions()
1058 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
1059 . -mat_mumps_icntl_4 <0,...,4> - print level
1060 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
1061 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
1062 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
1063 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
1064 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
1065 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
1066 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
1067 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
1068 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
1069 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
1070 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
1071 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
1072 
1073   Level: beginner
1074 
1075 .seealso: MATAIJMUMPS
1076 M*/
1077 
1078 EXTERN_C_BEGIN
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatCreate_SBAIJMUMPS"
1081 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A)
1082 {
1083   PetscErrorCode ierr;
1084   PetscMPIInt    size;
1085 
1086   PetscFunctionBegin;
1087   /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */
1088   /*   and SBAIJMUMPS types */
1089   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr);
1090   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
1091   if (size == 1) {
1092     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1093   } else {
1094     ierr   = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1095   }
1096   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
1097   PetscFunctionReturn(0);
1098 }
1099 EXTERN_C_END
1100