xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision a98ce0f41cecbcf583ea61eee7d4bfcfd656d38d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Provides an interface to the MUMPS sparse solver
5 */
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #include "src/mat/impls/sbaij/seq/sbaij.h"
9 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13 #include "zmumps_c.h"
14 #else
15 #include "dmumps_c.h"
16 #endif
17 EXTERN_C_END
18 #define JOB_INIT -1
19 #define JOB_END -2
20 /* macros s.t. indices match MUMPS documentation */
21 #define ICNTL(I) icntl[(I)-1]
22 #define CNTL(I) cntl[(I)-1]
23 #define INFOG(I) infog[(I)-1]
24 #define INFO(I) info[(I)-1]
25 #define RINFOG(I) rinfog[(I)-1]
26 #define RINFO(I) rinfo[(I)-1]
27 
28 typedef struct {
29 #if defined(PETSC_USE_COMPLEX)
30   ZMUMPS_STRUC_C id;
31 #else
32   DMUMPS_STRUC_C id;
33 #endif
34   MatStructure   matstruc;
35   PetscMPIInt    myid,size;
36   PetscInt       *irn,*jcn,sym,nSolve;
37   PetscScalar    *val;
38   MPI_Comm       comm_mumps;
39   VecScatter     scat_rhs, scat_sol;
40   PetscTruth     isAIJ,CleanUpMUMPS;
41   Vec            b_seq,x_seq;
42   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   PetscErrorCode (*MatView)(Mat,PetscViewer);
44   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48   PetscErrorCode (*specialdestroy)(Mat);
49   PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*);
50 } Mat_MUMPS;
51 
52 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*);
53 EXTERN_C_BEGIN
54 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat,MatType,MatReuse,Mat*);
55 EXTERN_C_END
56 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */
57 /*
58   input:
59     A       - matrix in mpiaij or mpisbaij (bs=1) format
60     shift   - 0: C style output triple; 1: Fortran style output triple.
61     valOnly - FALSE: spaces are allocated and values are set for the triple
62               TRUE:  only the values in v array are updated
63   output:
64     nnz     - dim of r, c, and v (number of local nonzero entries of A)
65     r, c, v - row and col index, matrix values (matrix triples)
66  */
67 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) {
68   PetscInt       *ai, *aj, *bi, *bj, rstart,nz, *garray;
69   PetscErrorCode ierr;
70   PetscInt       i,j,jj,jB,irow,m=A->rmap.n,*ajj,*bjj,countA,countB,colA_start,jcol;
71   PetscInt       *row,*col;
72   PetscScalar    *av, *bv,*val;
73   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
74 
75   PetscFunctionBegin;
76   if (mumps->isAIJ){
77     Mat_MPIAIJ    *mat =  (Mat_MPIAIJ*)A->data;
78     Mat_SeqAIJ    *aa=(Mat_SeqAIJ*)(mat->A)->data;
79     Mat_SeqAIJ    *bb=(Mat_SeqAIJ*)(mat->B)->data;
80     nz = aa->nz + bb->nz;
81     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
82     garray = mat->garray;
83     av=aa->a; bv=bb->a;
84 
85   } else {
86     Mat_MPISBAIJ  *mat =  (Mat_MPISBAIJ*)A->data;
87     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
88     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
89     if (A->rmap.bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->rmap.bs);
90     nz = aa->nz + bb->nz;
91     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= A->rmap.rstart;
92     garray = mat->garray;
93     av=aa->a; bv=bb->a;
94   }
95 
96   if (!valOnly){
97     ierr = PetscMalloc(nz*sizeof(PetscInt) ,&row);CHKERRQ(ierr);
98     ierr = PetscMalloc(nz*sizeof(PetscInt),&col);CHKERRQ(ierr);
99     ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr);
100     *r = row; *c = col; *v = val;
101   } else {
102     row = *r; col = *c; val = *v;
103   }
104   *nnz = nz;
105 
106   jj = 0; irow = rstart;
107   for ( i=0; i<m; i++ ) {
108     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
109     countA = ai[i+1] - ai[i];
110     countB = bi[i+1] - bi[i];
111     bjj = bj + bi[i];
112 
113     /* get jB, the starting local col index for the 2nd B-part */
114     colA_start = rstart + ajj[0]; /* the smallest col index for A */
115     j=-1;
116     do {
117       j++;
118       if (j == countB) break;
119       jcol = garray[bjj[j]];
120     } while (jcol < colA_start);
121     jB = j;
122 
123     /* B-part, smaller col index */
124     colA_start = rstart + ajj[0]; /* the smallest col index for A */
125     for (j=0; j<jB; j++){
126       jcol = garray[bjj[j]];
127       if (!valOnly){
128         row[jj] = irow + shift; col[jj] = jcol + shift;
129 
130       }
131       val[jj++] = *bv++;
132     }
133     /* A-part */
134     for (j=0; j<countA; j++){
135       if (!valOnly){
136         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
137       }
138       val[jj++] = *av++;
139     }
140     /* B-part, larger col index */
141     for (j=jB; j<countB; j++){
142       if (!valOnly){
143         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
144       }
145       val[jj++] = *bv++;
146     }
147     irow++;
148   }
149 
150   PetscFunctionReturn(0);
151 }
152 
153 EXTERN_C_BEGIN
154 #undef __FUNCT__
155 #define __FUNCT__ "MatConvert_MUMPS_Base"
156 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_MUMPS_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
157 {
158   PetscErrorCode ierr;
159   Mat            B=*newmat;
160   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
161   void           (*f)(void);
162 
163   PetscFunctionBegin;
164   if (reuse == MAT_INITIAL_MATRIX) {
165     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
166   }
167   B->ops->duplicate              = mumps->MatDuplicate;
168   B->ops->view                   = mumps->MatView;
169   B->ops->assemblyend            = mumps->MatAssemblyEnd;
170   B->ops->lufactorsymbolic       = mumps->MatLUFactorSymbolic;
171   B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic;
172   B->ops->destroy                = mumps->MatDestroy;
173 
174   /* put back original composed preallocation function */
175   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr);
176   if (f) {
177     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(PetscVoidFunction)mumps->MatPreallocate);CHKERRQ(ierr);
178   }
179 
180   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
181   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
182   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
183   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
184   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
185   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
186   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
187   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
188 
189   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
190   *newmat = B;
191   PetscFunctionReturn(0);
192 }
193 EXTERN_C_END
194 
195 #undef __FUNCT__
196 #define __FUNCT__ "MatDestroy_MUMPS"
197 PetscErrorCode MatDestroy_MUMPS(Mat A)
198 {
199   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
200   PetscErrorCode ierr;
201   PetscMPIInt    size=lu->size;
202   PetscErrorCode (*specialdestroy)(Mat);
203   PetscFunctionBegin;
204   if (lu->CleanUpMUMPS) {
205     /* Terminate instance, deallocate memories */
206     if (size > 1){
207       ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr);
208       ierr = VecScatterDestroy(lu->scat_rhs);CHKERRQ(ierr);
209       ierr = VecDestroy(lu->b_seq);CHKERRQ(ierr);
210       ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr);
211       ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr);
212       ierr = PetscFree(lu->val);CHKERRQ(ierr);
213     }
214     lu->id.job=JOB_END;
215 #if defined(PETSC_USE_COMPLEX)
216     zmumps_c(&lu->id);
217 #else
218     dmumps_c(&lu->id);
219 #endif
220     ierr = PetscFree(lu->irn);CHKERRQ(ierr);
221     ierr = PetscFree(lu->jcn);CHKERRQ(ierr);
222     ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr);
223   }
224   specialdestroy = lu->specialdestroy;
225   ierr = (*specialdestroy)(A);CHKERRQ(ierr);
226   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
227   PetscFunctionReturn(0);
228 }
229 
230 #undef __FUNCT__
231 #define __FUNCT__ "MatDestroy_AIJMUMPS"
232 PetscErrorCode MatDestroy_AIJMUMPS(Mat A)
233 {
234   PetscErrorCode ierr;
235   PetscMPIInt    size;
236 
237   PetscFunctionBegin;
238   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
239   if (size==1) {
240     ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
241   } else {
242     ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
243   }
244   PetscFunctionReturn(0);
245 }
246 
247 #undef __FUNCT__
248 #define __FUNCT__ "MatDestroy_SBAIJMUMPS"
249 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A)
250 {
251   PetscErrorCode ierr;
252   PetscMPIInt    size;
253 
254   PetscFunctionBegin;
255   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
256   if (size==1) {
257     ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
258   } else {
259     ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
260   }
261   PetscFunctionReturn(0);
262 }
263 
264 #undef __FUNCT__
265 #define __FUNCT__ "MatFactorInfo_MUMPS"
266 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) {
267   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
268   PetscErrorCode ierr;
269 
270   PetscFunctionBegin;
271   ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
272   ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                  %d \n",lu->id.sym);CHKERRQ(ierr);
273   ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):           %d \n",lu->id.par);CHKERRQ(ierr);
274   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(1) (output for error):        %d \n",lu->id.ICNTL(1));CHKERRQ(ierr);
275   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(2) (output of diagnostic msg):%d \n",lu->id.ICNTL(2));CHKERRQ(ierr);
276   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(3) (output for global info):  %d \n",lu->id.ICNTL(3));CHKERRQ(ierr);
277   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):       %d \n",lu->id.ICNTL(4));CHKERRQ(ierr);
278   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):        %d \n",lu->id.ICNTL(5));CHKERRQ(ierr);
279   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):       %d \n",lu->id.ICNTL(6));CHKERRQ(ierr);
280   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (matrix ordering):         %d \n",lu->id.ICNTL(7));CHKERRQ(ierr);
281   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(8) (scalling strategy):       %d \n",lu->id.ICNTL(8));CHKERRQ(ierr);
282   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(9) (A/A^T x=b is solved):     %d \n",lu->id.ICNTL(9));CHKERRQ(ierr);
283   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr);
284   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):         %d \n",lu->id.ICNTL(11));CHKERRQ(ierr);
285   if (!lu->myid && lu->id.ICNTL(11)>0) {
286     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(4) (inf norm of input mat):        %g\n",lu->id.RINFOG(4));CHKERRQ(ierr);
287     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(5) (inf norm of solution):         %g\n",lu->id.RINFOG(5));CHKERRQ(ierr);
288     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(6) (inf norm of residual):         %g\n",lu->id.RINFOG(6));CHKERRQ(ierr);
289     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr);
290     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(9) (error estimate):               %g \n",lu->id.RINFOG(9));CHKERRQ(ierr);
291     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr);
292 
293   }
294   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",lu->id.ICNTL(12));CHKERRQ(ierr);
295   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",lu->id.ICNTL(13));CHKERRQ(ierr);
296   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr);
297   /* ICNTL(15-17) not used */
298   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",lu->id.ICNTL(18));CHKERRQ(ierr);
299   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(19) (Shur complement info):                       %d \n",lu->id.ICNTL(19));CHKERRQ(ierr);
300   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(20) (rhs sparse pattern):                         %d \n",lu->id.ICNTL(20));CHKERRQ(ierr);
301   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(21) (solution struct):                            %d \n",lu->id.ICNTL(21));CHKERRQ(ierr);
302 
303   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",lu->id.CNTL(1));CHKERRQ(ierr);
304   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr);
305   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",lu->id.CNTL(3));CHKERRQ(ierr);
306   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(4) (value of static pivoting):         %g \n",lu->id.CNTL(4));CHKERRQ(ierr);
307 
308   /* infomation local to each processor */
309   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);}
310   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr);
311   ierr = PetscSynchronizedFlush(A->comm);
312   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);}
313   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr);
314   ierr = PetscSynchronizedFlush(A->comm);
315   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);}
316   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr);
317   ierr = PetscSynchronizedFlush(A->comm);
318   /*
319   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(2) (info about error or warning ): \n");CHKERRQ(ierr);}
320   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(2));CHKERRQ(ierr);
321   ierr = PetscSynchronizedFlush(A->comm);
322   */
323 
324   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization): \n");CHKERRQ(ierr);}
325   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(15));CHKERRQ(ierr);
326   ierr = PetscSynchronizedFlush(A->comm);
327 
328   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization): \n");CHKERRQ(ierr);}
329   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(16));CHKERRQ(ierr);
330   ierr = PetscSynchronizedFlush(A->comm);
331 
332   if (!lu->myid) {ierr = PetscPrintf(PETSC_COMM_SELF, "      INFO(23) (num of pivots eliminated on this processor after factorization): \n");CHKERRQ(ierr);}
333   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %d \n",lu->myid,lu->id.INFO(23));CHKERRQ(ierr);
334   ierr = PetscSynchronizedFlush(A->comm);
335 
336   if (!lu->myid){ /* information from the host */
337     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr);
338     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr);
339     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr);
340 
341     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr);
342     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr);
343     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr);
344     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr);
345     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr);
346     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr);
347     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d \n",lu->id.INFOG(9));CHKERRQ(ierr);
348     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after factorization): %d \n",lu->id.INFOG(10));CHKERRQ(ierr);
349     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix after factorization): %d \n",lu->id.INFOG(11));CHKERRQ(ierr);
350     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr);
351     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr);
352     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr);
353     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr);
354     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr);
355     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr);
356     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr);
357     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr);
358      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr);
359      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d \n",lu->id.INFOG(21));CHKERRQ(ierr);
360      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d \n",lu->id.INFOG(22));CHKERRQ(ierr);
361      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d \n",lu->id.INFOG(23));CHKERRQ(ierr);
362      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d \n",lu->id.INFOG(24));CHKERRQ(ierr);
363      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(25) (after factorization: number of pivots modified by static pivoting): %d \n",lu->id.INFOG(25));CHKERRQ(ierr);
364   }
365 
366   PetscFunctionReturn(0);
367 }
368 
369 #undef __FUNCT__
370 #define __FUNCT__ "MatView_MUMPS"
371 PetscErrorCode MatView_MUMPS(Mat A,PetscViewer viewer) {
372   PetscErrorCode    ierr;
373   PetscTruth        iascii;
374   PetscViewerFormat format;
375   Mat_MUMPS         *mumps=(Mat_MUMPS*)(A->spptr);
376 
377   PetscFunctionBegin;
378   ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr);
379 
380   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
381   if (iascii) {
382     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
383     if (format == PETSC_VIEWER_ASCII_INFO){
384       ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr);
385     }
386   }
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNCT__
391 #define __FUNCT__ "MatSolve_AIJMUMPS"
392 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) {
393   Mat_MUMPS      *lu=(Mat_MUMPS*)A->spptr;
394   PetscScalar    *array;
395   Vec            x_seq;
396   IS             is_iden,is_petsc;
397   VecScatter     scat_rhs=lu->scat_rhs,scat_sol=lu->scat_sol;
398   PetscErrorCode ierr;
399   PetscInt       i;
400 
401   PetscFunctionBegin;
402   lu->id.nrhs = 1;
403   x_seq = lu->b_seq;
404   if (lu->size > 1){
405     /* MUMPS only supports centralized rhs. Scatter b into a seqential rhs vector */
406     ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr);
407     ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat_rhs);CHKERRQ(ierr);
408     if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);}
409   } else {  /* size == 1 */
410     ierr = VecCopy(b,x);CHKERRQ(ierr);
411     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
412   }
413   if (!lu->myid) { /* define rhs on the host */
414 #if defined(PETSC_USE_COMPLEX)
415     lu->id.rhs = (mumps_double_complex*)array;
416 #else
417     lu->id.rhs = array;
418 #endif
419   }
420   if (lu->size == 1){
421     ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);
422   } else if (!lu->myid){
423     ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr);
424   }
425 
426   if (lu->size > 1){
427     /* distributed solution */
428     lu->id.ICNTL(21) = 1;
429     if (!lu->nSolve){
430       /* Create x_seq=sol_loc for repeated use */
431       PetscInt    lsol_loc;
432       PetscScalar *sol_loc;
433       lsol_loc = lu->id.INFO(23); /* length of sol_loc */
434       ierr = PetscMalloc((1+lsol_loc)*(sizeof(PetscScalar)+sizeof(PetscInt)),&sol_loc);CHKERRQ(ierr);
435       lu->id.isol_loc = (PetscInt *)(sol_loc + lsol_loc);
436       lu->id.lsol_loc = lsol_loc;
437       lu->id.sol_loc  = (F_DOUBLE *)sol_loc;
438       ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,lsol_loc,sol_loc,&lu->x_seq);CHKERRQ(ierr);
439     }
440   }
441 
442   /* solve phase */
443   /*-------------*/
444   lu->id.job = 3;
445 #if defined(PETSC_USE_COMPLEX)
446   zmumps_c(&lu->id);
447 #else
448   dmumps_c(&lu->id);
449 #endif
450   if (lu->id.INFOG(1) < 0) {
451     SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
452   }
453 
454   if (lu->size > 1) { /* convert mumps distributed solution to petsc mpi x */
455     if (!lu->nSolve){ /* create scatter scat_sol */
456       ierr = ISCreateStride(PETSC_COMM_SELF,lu->id.lsol_loc,0,1,&is_iden);CHKERRQ(ierr); /* from */
457       for (i=0; i<lu->id.lsol_loc; i++){
458         lu->id.isol_loc[i] -= 1; /* change Fortran style to C style */
459       }
460       ierr = ISCreateGeneral(PETSC_COMM_SELF,lu->id.lsol_loc,lu->id.isol_loc,&is_petsc);CHKERRQ(ierr);  /* to */
461       ierr = VecScatterCreate(lu->x_seq,is_iden,x,is_petsc,&lu->scat_sol);CHKERRQ(ierr);
462       ierr = ISDestroy(is_iden);CHKERRQ(ierr);
463       ierr = ISDestroy(is_petsc);CHKERRQ(ierr);
464     }
465     ierr = VecScatterBegin(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr);
466     ierr = VecScatterEnd(lu->x_seq,x,INSERT_VALUES,SCATTER_FORWARD,lu->scat_sol);CHKERRQ(ierr);
467   }
468   lu->nSolve++;
469   PetscFunctionReturn(0);
470 }
471 
472 /*
473   input:
474    F:        numeric factor
475   output:
476    nneg:     total number of negative pivots
477    nzero:    0
478    npos:     (global dimension of F) - nneg
479 */
480 
481 #undef __FUNCT__
482 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS"
483 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
484 {
485   Mat_MUMPS      *lu =(Mat_MUMPS*)F->spptr;
486   PetscErrorCode ierr;
487   PetscMPIInt    size;
488 
489   PetscFunctionBegin;
490   ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr);
491   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
492   if (size > 1 && lu->id.ICNTL(13) != 1){
493     SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
494   }
495   if (nneg){
496     if (!lu->myid){
497       *nneg = lu->id.INFOG(12);
498     }
499     ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr);
500   }
501   if (nzero) *nzero = 0;
502   if (npos)  *npos  = F->rmap.N - (*nneg);
503   PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS"
508 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,MatFactorInfo *info,Mat *F)
509 {
510   Mat_MUMPS      *lu =(Mat_MUMPS*)(*F)->spptr;
511   Mat_MUMPS      *lua=(Mat_MUMPS*)(A)->spptr;
512   PetscErrorCode ierr;
513   PetscInt       rnz,nnz,nz=0,i,M=A->rmap.N,*ai,*aj,icntl;
514   PetscTruth     valOnly,flg;
515   Mat            F_diag;
516 
517   PetscFunctionBegin;
518   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
519     (*F)->ops->solve    = MatSolve_AIJMUMPS;
520 
521     /* Initialize a MUMPS instance */
522     ierr = MPI_Comm_rank(A->comm, &lu->myid);
523     ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr);
524     lua->myid = lu->myid; lua->size = lu->size;
525     lu->id.job = JOB_INIT;
526     ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr);
527     ierr = MPICCommToFortranComm(lu->comm_mumps,&(lu->id.comm_fortran));CHKERRQ(ierr);
528 
529     /* Set mumps options */
530     ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
531     lu->id.par=1;  /* host participates factorizaton and solve */
532     lu->id.sym=lu->sym;
533     if (lu->sym == 2){
534       ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr);
535       if (flg && icntl == 1) lu->id.sym=icntl;  /* matrix is spd */
536     }
537 #if defined(PETSC_USE_COMPLEX)
538     zmumps_c(&lu->id);
539 #else
540     dmumps_c(&lu->id);
541 #endif
542 
543     if (lu->size == 1){
544       lu->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
545     } else {
546       lu->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
547     }
548 
549     icntl=-1;
550     lu->id.ICNTL(4) = 0;  /* level of printing; overwrite mumps default ICNTL(4)=2 */
551     ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
552     if ((flg && icntl > 0) || PetscLogPrintInfo) {
553       lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
554     } else { /* no output */
555       lu->id.ICNTL(1) = 0;  /* error message, default= 6 */
556       lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */
557       lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */
558     }
559     ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr);
560     icntl=-1;
561     ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
562     if (flg) {
563       if (icntl== 1){
564         SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
565       } else {
566         lu->id.ICNTL(7) = icntl;
567       }
568     }
569     ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr);
570     ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr);
571     ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr);
572     ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr);
573     ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr);
574     ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr);
575     ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr);
576 
577     ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr);
578     ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr);
579     ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr);
580     ierr = PetscOptionsReal("-mat_mumps_cntl_4","CNTL(4): value for static pivoting","None",lu->id.CNTL(4),&lu->id.CNTL(4),PETSC_NULL);CHKERRQ(ierr);
581     PetscOptionsEnd();
582   }
583 
584   /* define matrix A */
585   switch (lu->id.ICNTL(18)){
586   case 0:  /* centralized assembled matrix input (size=1) */
587     if (!lu->myid) {
588       if (lua->isAIJ){
589         Mat_SeqAIJ   *aa = (Mat_SeqAIJ*)A->data;
590         nz               = aa->nz;
591         ai = aa->i; aj = aa->j; lu->val = aa->a;
592       } else {
593         Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
594         nz                  =  aa->nz;
595         ai = aa->i; aj = aa->j; lu->val = aa->a;
596       }
597       if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */
598         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->irn);CHKERRQ(ierr);
599         ierr = PetscMalloc(nz*sizeof(PetscInt),&lu->jcn);CHKERRQ(ierr);
600         nz = 0;
601         for (i=0; i<M; i++){
602           rnz = ai[i+1] - ai[i];
603           while (rnz--) {  /* Fortran row/col index! */
604             lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++;
605           }
606         }
607       }
608     }
609     break;
610   case 3:  /* distributed assembled matrix input (size>1) */
611     if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
612       valOnly = PETSC_FALSE;
613     } else {
614       valOnly = PETSC_TRUE; /* only update mat values, not row and col index */
615     }
616     ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr);
617     break;
618   default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS.");
619   }
620 
621   /* analysis phase */
622   /*----------------*/
623   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
624     lu->id.job = 1;
625 
626     lu->id.n = M;
627     switch (lu->id.ICNTL(18)){
628     case 0:  /* centralized assembled matrix input */
629       if (!lu->myid) {
630         lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
631         if (lu->id.ICNTL(6)>1){
632 #if defined(PETSC_USE_COMPLEX)
633           lu->id.a = (mumps_double_complex*)lu->val;
634 #else
635           lu->id.a = lu->val;
636 #endif
637         }
638       }
639       break;
640     case 3:  /* distributed assembled matrix input (size>1) */
641       lu->id.nz_loc = nnz;
642       lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
643       if (lu->id.ICNTL(6)>1) {
644 #if defined(PETSC_USE_COMPLEX)
645         lu->id.a_loc = (mumps_double_complex*)lu->val;
646 #else
647         lu->id.a_loc = lu->val;
648 #endif
649       }
650       /* MUMPS only supports centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */
651       IS  is_iden;
652       Vec b;
653       if (!lu->myid){
654         ierr = VecCreateSeq(PETSC_COMM_SELF,A->cmap.N,&lu->b_seq);CHKERRQ(ierr);
655         ierr = ISCreateStride(PETSC_COMM_SELF,A->cmap.N,0,1,&is_iden);CHKERRQ(ierr);
656       } else {
657         ierr = VecCreateSeq(PETSC_COMM_SELF,0,&lu->b_seq);CHKERRQ(ierr);
658         ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&is_iden);CHKERRQ(ierr);
659       }
660       ierr = VecCreate(A->comm,&b);CHKERRQ(ierr);
661       ierr = VecSetSizes(b,A->rmap.n,PETSC_DECIDE);CHKERRQ(ierr);
662       ierr = VecSetFromOptions(b);CHKERRQ(ierr);
663 
664       ierr = VecScatterCreate(b,is_iden,lu->b_seq,is_iden,&lu->scat_rhs);CHKERRQ(ierr);
665       ierr = ISDestroy(is_iden);CHKERRQ(ierr);
666       ierr = VecDestroy(b);CHKERRQ(ierr);
667       break;
668     }
669 #if defined(PETSC_USE_COMPLEX)
670     zmumps_c(&lu->id);
671 #else
672     dmumps_c(&lu->id);
673 #endif
674     if (lu->id.INFOG(1) < 0) {
675       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
676     }
677   }
678 
679   /* numerical factorization phase */
680   /*-------------------------------*/
681   lu->id.job = 2;
682   if(!lu->id.ICNTL(18)) {
683     if (!lu->myid) {
684 #if defined(PETSC_USE_COMPLEX)
685       lu->id.a = (mumps_double_complex*)lu->val;
686 #else
687       lu->id.a = lu->val;
688 #endif
689     }
690   } else {
691 #if defined(PETSC_USE_COMPLEX)
692     lu->id.a_loc = (mumps_double_complex*)lu->val;
693 #else
694     lu->id.a_loc = lu->val;
695 #endif
696   }
697 #if defined(PETSC_USE_COMPLEX)
698   zmumps_c(&lu->id);
699 #else
700   dmumps_c(&lu->id);
701 #endif
702   if (lu->id.INFOG(1) < 0) {
703     if (lu->id.INFO(1) == -13) {
704       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2));
705     } else {
706       SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
707     }
708   }
709 
710   if (!lu->myid && lu->id.ICNTL(16) > 0){
711     SETERRQ1(PETSC_ERR_LIB,"  lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
712   }
713 
714   if (lu->size > 1){
715     if ((*F)->factor == FACTOR_LU){
716       F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
717     } else {
718       F_diag = ((Mat_MPISBAIJ *)(*F)->data)->A;
719     }
720     F_diag->assembled = PETSC_TRUE;
721     if (lu->nSolve){
722       ierr = VecScatterDestroy(lu->scat_sol);CHKERRQ(ierr);
723       ierr = PetscFree(lu->id.sol_loc);CHKERRQ(ierr);
724       ierr = VecDestroy(lu->x_seq);CHKERRQ(ierr);
725     }
726   }
727   (*F)->assembled   = PETSC_TRUE;
728   lu->matstruc      = SAME_NONZERO_PATTERN;
729   lu->CleanUpMUMPS  = PETSC_TRUE;
730   lu->nSolve        = 0;
731   PetscFunctionReturn(0);
732 }
733 
734 /* Note the Petsc r and c permutations are ignored */
735 #undef __FUNCT__
736 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS"
737 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) {
738   Mat            B;
739   Mat_MUMPS      *lu;
740   PetscErrorCode ierr;
741 
742   PetscFunctionBegin;
743   /* Create the factorization matrix */
744   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
745   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
746   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
747   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
748   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
749 
750   B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS;
751   B->factor               = FACTOR_LU;
752   lu                      = (Mat_MUMPS*)B->spptr;
753   lu->sym                 = 0;
754   lu->matstruc            = DIFFERENT_NONZERO_PATTERN;
755 
756   *F = B;
757   PetscFunctionReturn(0);
758 }
759 
760 /* Note the Petsc r permutation is ignored */
761 #undef __FUNCT__
762 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS"
763 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) {
764   Mat            B;
765   Mat_MUMPS      *lu;
766   PetscErrorCode ierr;
767 
768   PetscFunctionBegin;
769   /* Create the factorization matrix */
770   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
771   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,A->rmap.N,A->cmap.N);CHKERRQ(ierr);
772   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
773   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
774   ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
775 
776   B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS;
777   B->ops->getinertia            = MatGetInertia_SBAIJMUMPS;
778   B->factor                     = FACTOR_CHOLESKY;
779   lu                            = (Mat_MUMPS*)B->spptr;
780   lu->sym                       = 2;
781   lu->matstruc                  = DIFFERENT_NONZERO_PATTERN;
782 
783   *F = B;
784   PetscFunctionReturn(0);
785 }
786 
787 #undef __FUNCT__
788 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS"
789 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) {
790   PetscErrorCode ierr;
791   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
792 
793   PetscFunctionBegin;
794   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
795 
796   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
797   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
798   A->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
799   PetscFunctionReturn(0);
800 }
801 
802 EXTERN_C_BEGIN
803 #undef __FUNCT__
804 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS"
805 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_AIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
806 {
807   PetscErrorCode ierr;
808   PetscMPIInt    size;
809   MPI_Comm       comm;
810   Mat            B=*newmat;
811   Mat_MUMPS      *mumps;
812 
813   PetscFunctionBegin;
814   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
815   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
816 
817   if (reuse == MAT_INITIAL_MATRIX) {
818     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
819     /* A may have special container that is not duplicated,
820        e.g., A is obtainted from MatMatMult(,&A). Save B->ops instead */
821     mumps->MatDuplicate              = B->ops->duplicate;
822     mumps->MatView                   = B->ops->view;
823     mumps->MatAssemblyEnd            = B->ops->assemblyend;
824     mumps->MatLUFactorSymbolic       = B->ops->lufactorsymbolic;
825     mumps->MatCholeskyFactorSymbolic = B->ops->choleskyfactorsymbolic;
826     mumps->MatDestroy                = B->ops->destroy;
827   } else {
828     mumps->MatDuplicate              = A->ops->duplicate;
829     mumps->MatView                   = A->ops->view;
830     mumps->MatAssemblyEnd            = A->ops->assemblyend;
831     mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
832     mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
833     mumps->MatDestroy                = A->ops->destroy;
834   }
835   mumps->specialdestroy            = MatDestroy_AIJMUMPS;
836   mumps->CleanUpMUMPS              = PETSC_FALSE;
837   mumps->isAIJ                     = PETSC_TRUE;
838 
839   B->spptr                         = (void*)mumps;
840   B->ops->duplicate                = MatDuplicate_MUMPS;
841   B->ops->view                     = MatView_MUMPS;
842   B->ops->assemblyend              = MatAssemblyEnd_AIJMUMPS;
843   B->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
844   B->ops->destroy                  = MatDestroy_MUMPS;
845 
846   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
847   if (size == 1) {
848     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C",
849                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
850     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C",
851                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
852   } else {
853     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C",
854                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
855     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C",
856                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
857   }
858 
859   ierr = PetscInfo(0,"Using MUMPS for LU factorization and solves.\n");CHKERRQ(ierr);
860   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
861   *newmat = B;
862   PetscFunctionReturn(0);
863 }
864 EXTERN_C_END
865 
866 /*MC
867   MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed
868   and sequential matrices via the external package MUMPS.
869 
870   If MUMPS is installed (see the manual for instructions
871   on how to declare the existence of external packages),
872   a matrix type can be constructed which invokes MUMPS solvers.
873   After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS).
874 
875   If created with a single process communicator, this matrix type inherits from MATSEQAIJ.
876   Otherwise, this matrix type inherits from MATMPIAIJ.  Hence for single process communicators,
877   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
878   for communicators controlling multiple processes.  It is recommended that you call both of
879   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
880   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
881   without data copy.
882 
883   Options Database Keys:
884 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions()
885 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
886 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level
887 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
888 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
889 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
890 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
891 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
892 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
893 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
894 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
895 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
896 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
897 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
898 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
899 
900   Level: beginner
901 
902 .seealso: MATSBAIJMUMPS
903 M*/
904 
905 EXTERN_C_BEGIN
906 #undef __FUNCT__
907 #define __FUNCT__ "MatCreate_AIJMUMPS"
908 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_AIJMUMPS(Mat A)
909 {
910   PetscErrorCode ierr;
911   PetscMPIInt    size;
912 
913   PetscFunctionBegin;
914   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
915   if (size == 1) {
916     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
917   } else {
918     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
919     /*
920     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
921     ierr   = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
922     */
923   }
924   ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
925   PetscFunctionReturn(0);
926 }
927 EXTERN_C_END
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS"
931 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode)
932 {
933   PetscErrorCode ierr;
934   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
935 
936   PetscFunctionBegin;
937   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
938   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
939   A->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
940   PetscFunctionReturn(0);
941 }
942 
943 EXTERN_C_BEGIN
944 #undef __FUNCT__
945 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS"
946 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
947 {
948   Mat       A;
949   Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr;
950   PetscErrorCode ierr;
951 
952   PetscFunctionBegin;
953   /*
954     After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix
955     into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS.  I would
956     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
957     block size info so that PETSc can determine the local size properly.  The block size info is set
958     in the preallocation routine.
959   */
960   ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
961   A    = ((Mat_MPISBAIJ *)B->data)->A;
962   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 EXTERN_C_END
966 
967 EXTERN_C_BEGIN
968 #undef __FUNCT__
969 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS"
970 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SBAIJ_SBAIJMUMPS(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
971 {
972   PetscErrorCode ierr;
973   PetscMPIInt    size;
974   MPI_Comm       comm;
975   Mat            B=*newmat;
976   Mat_MUMPS      *mumps;
977   void           (*f)(void);
978 
979   PetscFunctionBegin;
980   if (reuse == MAT_INITIAL_MATRIX) {
981     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
982   }
983 
984   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
985   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
986 
987   mumps->MatDuplicate              = A->ops->duplicate;
988   mumps->MatView                   = A->ops->view;
989   mumps->MatAssemblyEnd            = A->ops->assemblyend;
990   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
991   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
992   mumps->MatDestroy                = A->ops->destroy;
993   mumps->specialdestroy            = MatDestroy_SBAIJMUMPS;
994   mumps->CleanUpMUMPS              = PETSC_FALSE;
995   mumps->isAIJ                     = PETSC_FALSE;
996 
997   B->spptr                         = (void*)mumps;
998   B->ops->duplicate                = MatDuplicate_MUMPS;
999   B->ops->view                     = MatView_MUMPS;
1000   B->ops->assemblyend              = MatAssemblyEnd_SBAIJMUMPS;
1001   B->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
1002   B->ops->destroy                  = MatDestroy_MUMPS;
1003 
1004   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
1005   if (size == 1) {
1006     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C",
1007                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
1008     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C",
1009                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
1010   } else {
1011   /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */
1012     ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(PetscVoidStarFunction)&f);CHKERRQ(ierr);
1013     if (f) { /* This case should always be true when this routine is called */
1014       mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f;
1015       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1016                                                "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS",
1017                                                MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr);
1018     }
1019     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C",
1020                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
1021     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C",
1022                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
1023   }
1024 
1025   ierr = PetscInfo(0,"Using MUMPS for Cholesky factorization and solves.\n");CHKERRQ(ierr);
1026   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
1027   *newmat = B;
1028   PetscFunctionReturn(0);
1029 }
1030 EXTERN_C_END
1031 
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatDuplicate_MUMPS"
1034 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) {
1035   PetscErrorCode ierr;
1036   Mat_MUMPS   *lu=(Mat_MUMPS *)A->spptr;
1037 
1038   PetscFunctionBegin;
1039   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
1040   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr);
1041   PetscFunctionReturn(0);
1042 }
1043 
1044 /*MC
1045   MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for
1046   distributed and sequential matrices via the external package MUMPS.
1047 
1048   If MUMPS is installed (see the manual for instructions
1049   on how to declare the existence of external packages),
1050   a matrix type can be constructed which invokes MUMPS solvers.
1051   After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS).
1052 
1053   If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ.
1054   Otherwise, this matrix type inherits from MATMPISBAIJ.  Hence for single process communicators,
1055   MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
1056   for communicators controlling multiple processes.  It is recommended that you call both of
1057   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
1058   conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size)
1059   without data copy.
1060 
1061   Options Database Keys:
1062 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions()
1063 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
1064 . -mat_mumps_icntl_4 <0,...,4> - print level
1065 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
1066 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
1067 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
1068 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
1069 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
1070 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
1071 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
1072 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
1073 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
1074 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
1075 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
1076 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
1077 
1078   Level: beginner
1079 
1080 .seealso: MATAIJMUMPS
1081 M*/
1082 
1083 EXTERN_C_BEGIN
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatCreate_SBAIJMUMPS"
1086 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJMUMPS(Mat A)
1087 {
1088   PetscErrorCode ierr;
1089   PetscMPIInt    size;
1090 
1091   PetscFunctionBegin;
1092   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
1093   if (size == 1) {
1094     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1095   } else {
1096     ierr   = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1097   }
1098   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 EXTERN_C_END
1102