xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 9af31e4ad595286b4e2df8194fee047feeccfe42)
1 
2 /*
3     Provides an interface to the MUMPS_4.3.1 sparse solver
4 */
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/mat/impls/sbaij/seq/sbaij.h"
8 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #include "zmumps_c.h"
13 #else
14 #include "dmumps_c.h"
15 #endif
16 EXTERN_C_END
17 #define JOB_INIT -1
18 #define JOB_END -2
19 /* macros s.t. indices match MUMPS documentation */
20 #define ICNTL(I) icntl[(I)-1]
21 #define CNTL(I) cntl[(I)-1]
22 #define INFOG(I) infog[(I)-1]
23 #define INFO(I) info[(I)-1]
24 #define RINFOG(I) rinfog[(I)-1]
25 #define RINFO(I) rinfo[(I)-1]
26 
27 typedef struct {
28 #if defined(PETSC_USE_COMPLEX)
29   ZMUMPS_STRUC_C id;
30 #else
31   DMUMPS_STRUC_C id;
32 #endif
33   MatStructure   matstruc;
34   int            myid,size,*irn,*jcn,sym;
35   PetscScalar    *val;
36   MPI_Comm       comm_mumps;
37 
38   PetscTruth     isAIJ,CleanUpMUMPS;
39   int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
40   int (*MatView)(Mat,PetscViewer);
41   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
42   int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
43   int (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
44   int (*MatDestroy)(Mat);
45   int (*specialdestroy)(Mat);
46   int (*MatPreallocate)(Mat,int,int,int*,int,int*);
47 } Mat_MUMPS;
48 
49 EXTERN int MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*);
50 EXTERN_C_BEGIN
51 int MatConvert_SBAIJ_SBAIJMUMPS(Mat,const MatType,Mat*);
52 EXTERN_C_END
53 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */
54 /*
55   input:
56     A       - matrix in mpiaij or mpisbaij (bs=1) format
57     shift   - 0: C style output triple; 1: Fortran style output triple.
58     valOnly - FALSE: spaces are allocated and values are set for the triple
59               TRUE:  only the values in v array are updated
60   output:
61     nnz     - dim of r, c, and v (number of local nonzero entries of A)
62     r, c, v - row and col index, matrix values (matrix triples)
63  */
64 int MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) {
65   int         *ai, *aj, *bi, *bj, rstart,nz, *garray;
66   int         ierr,i,j,jj,jB,irow,m=A->m,*ajj,*bjj,countA,countB,colA_start,jcol;
67   int         *row,*col;
68   PetscScalar *av, *bv,*val;
69   Mat_MUMPS   *mumps=(Mat_MUMPS*)A->spptr;
70 
71   PetscFunctionBegin;
72   if (mumps->isAIJ){
73     Mat_MPIAIJ    *mat =  (Mat_MPIAIJ*)A->data;
74     Mat_SeqAIJ    *aa=(Mat_SeqAIJ*)(mat->A)->data;
75     Mat_SeqAIJ    *bb=(Mat_SeqAIJ*)(mat->B)->data;
76     nz = aa->nz + bb->nz;
77     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart;
78     garray = mat->garray;
79     av=aa->a; bv=bb->a;
80 
81   } else {
82     Mat_MPISBAIJ  *mat =  (Mat_MPISBAIJ*)A->data;
83     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
84     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
85     if (mat->bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", mat->bs);
86     nz = aa->nz + bb->nz;
87     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart;
88     garray = mat->garray;
89     av=aa->a; bv=bb->a;
90   }
91 
92   if (!valOnly){
93     ierr = PetscMalloc(nz*sizeof(int),&row);CHKERRQ(ierr);
94     ierr = PetscMalloc(nz*sizeof(int),&col);CHKERRQ(ierr);
95     ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr);
96     *r = row; *c = col; *v = val;
97   } else {
98     row = *r; col = *c; val = *v;
99   }
100   *nnz = nz;
101 
102   jj = 0; irow = rstart;
103   for ( i=0; i<m; i++ ) {
104     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
105     countA = ai[i+1] - ai[i];
106     countB = bi[i+1] - bi[i];
107     bjj = bj + bi[i];
108 
109     /* get jB, the starting local col index for the 2nd B-part */
110     colA_start = rstart + ajj[0]; /* the smallest col index for A */
111     j=-1;
112     do {
113       j++;
114       if (j == countB) break;
115       jcol = garray[bjj[j]];
116     } while (jcol < colA_start);
117     jB = j;
118 
119     /* B-part, smaller col index */
120     colA_start = rstart + ajj[0]; /* the smallest col index for A */
121     for (j=0; j<jB; j++){
122       jcol = garray[bjj[j]];
123       if (!valOnly){
124         row[jj] = irow + shift; col[jj] = jcol + shift;
125 
126       }
127       val[jj++] = *bv++;
128     }
129     /* A-part */
130     for (j=0; j<countA; j++){
131       if (!valOnly){
132         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
133       }
134       val[jj++] = *av++;
135     }
136     /* B-part, larger col index */
137     for (j=jB; j<countB; j++){
138       if (!valOnly){
139         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
140       }
141       val[jj++] = *bv++;
142     }
143     irow++;
144   }
145 
146   PetscFunctionReturn(0);
147 }
148 
149 EXTERN_C_BEGIN
150 #undef __FUNCT__
151 #define __FUNCT__ "MatConvert_MUMPS_Base"
152 int MatConvert_MUMPS_Base(Mat A,const MatType type,Mat *newmat) {
153   int       ierr;
154   Mat       B=*newmat;
155   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
156 
157   PetscFunctionBegin;
158   if (B != A) {
159     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
160   }
161   B->ops->duplicate              = mumps->MatDuplicate;
162   B->ops->view                   = mumps->MatView;
163   B->ops->assemblyend            = mumps->MatAssemblyEnd;
164   B->ops->lufactorsymbolic       = mumps->MatLUFactorSymbolic;
165   B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic;
166   B->ops->destroy                = mumps->MatDestroy;
167   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
168   ierr = PetscFree(mumps);CHKERRQ(ierr);
169   *newmat = B;
170   PetscFunctionReturn(0);
171 }
172 EXTERN_C_END
173 
174 #undef __FUNCT__
175 #define __FUNCT__ "MatDestroy_MUMPS"
176 int MatDestroy_MUMPS(Mat A) {
177   Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
178   int       ierr,size=lu->size;
179   int       (*specialdestroy)(Mat);
180   PetscFunctionBegin;
181   if (lu->CleanUpMUMPS) {
182     /* Terminate instance, deallocate memories */
183     lu->id.job=JOB_END;
184 #if defined(PETSC_USE_COMPLEX)
185     zmumps_c(&lu->id);
186 #else
187     dmumps_c(&lu->id);
188 #endif
189     if (lu->irn) {
190       ierr = PetscFree(lu->irn);CHKERRQ(ierr);
191     }
192     if (lu->jcn) {
193       ierr = PetscFree(lu->jcn);CHKERRQ(ierr);
194     }
195     if (size>1 && lu->val) {
196       ierr = PetscFree(lu->val);CHKERRQ(ierr);
197     }
198     ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr);
199   }
200   specialdestroy = lu->specialdestroy;
201   ierr = (*specialdestroy)(A);CHKERRQ(ierr);
202   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
203   PetscFunctionReturn(0);
204 }
205 
206 #undef __FUNCT__
207 #define __FUNCT__ "MatDestroy_AIJMUMPS"
208 int MatDestroy_AIJMUMPS(Mat A) {
209   int ierr, size;
210 
211   PetscFunctionBegin;
212   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
213   if (size==1) {
214     ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
215   } else {
216     ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
217   }
218   PetscFunctionReturn(0);
219 }
220 
221 #undef __FUNCT__
222 #define __FUNCT__ "MatDestroy_SBAIJMUMPS"
223 int MatDestroy_SBAIJMUMPS(Mat A) {
224   int ierr, size;
225 
226   PetscFunctionBegin;
227   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
228   if (size==1) {
229     ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,&A);CHKERRQ(ierr);
230   } else {
231     ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,&A);CHKERRQ(ierr);
232   }
233   PetscFunctionReturn(0);
234 }
235 
236 #undef __FUNCT__
237 #define __FUNCT__ "MatFactorInfo_MUMPS"
238 int MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) {
239   Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
240   int       ierr;
241 
242   PetscFunctionBegin;
243   ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
244   ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                  %d \n",lu->id.sym);CHKERRQ(ierr);
245   ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):           %d \n",lu->id.par);CHKERRQ(ierr);
246   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):       %d \n",lu->id.ICNTL(4));CHKERRQ(ierr);
247   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):        %d \n",lu->id.ICNTL(5));CHKERRQ(ierr);
248   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):       %d \n",lu->id.ICNTL(6));CHKERRQ(ierr);
249   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (matrix ordering):         %d \n",lu->id.ICNTL(7));CHKERRQ(ierr);
250   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(9) (A/A^T x=b is solved):     %d \n",lu->id.ICNTL(9));CHKERRQ(ierr);
251   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr);
252   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):         %d \n",lu->id.ICNTL(11));CHKERRQ(ierr);
253   if (!lu->myid && lu->id.ICNTL(11)>0) {
254     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(4) (inf norm of input mat):        %g\n",lu->id.RINFOG(4));CHKERRQ(ierr);
255     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(5) (inf norm of solution):         %g\n",lu->id.RINFOG(5));CHKERRQ(ierr);
256     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(6) (inf norm of residual):         %g\n",lu->id.RINFOG(6));CHKERRQ(ierr);
257     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr);
258     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(9) (error estimate):               %g \n",lu->id.RINFOG(9));CHKERRQ(ierr);
259     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr);
260 
261   }
262   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",lu->id.ICNTL(12));CHKERRQ(ierr);
263   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",lu->id.ICNTL(13));CHKERRQ(ierr);
264   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr);
265   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(15) (efficiency control):                         %d \n",lu->id.ICNTL(15));CHKERRQ(ierr);
266   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",lu->id.ICNTL(18));CHKERRQ(ierr);
267 
268   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",lu->id.CNTL(1));CHKERRQ(ierr);
269   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr);
270   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",lu->id.CNTL(3));CHKERRQ(ierr);
271 
272   /* infomation local to each processor */
273   if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
274   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr);
275   ierr = PetscSynchronizedFlush(A->comm);
276   if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
277   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr);
278   ierr = PetscSynchronizedFlush(A->comm);
279   if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
280   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr);
281   ierr = PetscSynchronizedFlush(A->comm);
282 
283   if (!lu->myid){ /* information from the host */
284     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr);
285     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr);
286     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr);
287 
288     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr);
289     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr);
290     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr);
291     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr);
292     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr);
293     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr);
294     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr);
295     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr);
296     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr);
297     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr);
298     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr);
299     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr);
300     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr);
301     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr);
302     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr);
303     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr);
304     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr);
305      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr);
306   }
307 
308   PetscFunctionReturn(0);
309 }
310 
311 #undef __FUNCT__
312 #define __FUNCT__ "MatView_AIJMUMPS"
313 int MatView_AIJMUMPS(Mat A,PetscViewer viewer) {
314   int               ierr;
315   PetscTruth        iascii;
316   PetscViewerFormat format;
317   Mat_MUMPS         *mumps=(Mat_MUMPS*)(A->spptr);
318 
319   PetscFunctionBegin;
320   ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr);
321 
322   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
323   if (iascii) {
324     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
325     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
326       ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr);
327     }
328   }
329   PetscFunctionReturn(0);
330 }
331 
332 #undef __FUNCT__
333 #define __FUNCT__ "MatSolve_AIJMUMPS"
334 int MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) {
335   Mat_MUMPS   *lu=(Mat_MUMPS*)A->spptr;
336   PetscScalar *array;
337   Vec         x_seq;
338   IS          iden;
339   VecScatter  scat;
340   int         ierr;
341 
342   PetscFunctionBegin;
343   if (lu->size > 1){
344     if (!lu->myid){
345       ierr = VecCreateSeq(PETSC_COMM_SELF,A->N,&x_seq);CHKERRQ(ierr);
346       ierr = ISCreateStride(PETSC_COMM_SELF,A->N,0,1,&iden);CHKERRQ(ierr);
347     } else {
348       ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr);
349       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr);
350     }
351     ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr);
352     ierr = ISDestroy(iden);CHKERRQ(ierr);
353 
354     ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
355     ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
356     if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);}
357   } else {  /* size == 1 */
358     ierr = VecCopy(b,x);CHKERRQ(ierr);
359     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
360   }
361   if (!lu->myid) { /* define rhs on the host */
362 #if defined(PETSC_USE_COMPLEX)
363     lu->id.rhs = (mumps_double_complex*)array;
364 #else
365     lu->id.rhs = array;
366 #endif
367   }
368 
369   /* solve phase */
370   lu->id.job=3;
371 #if defined(PETSC_USE_COMPLEX)
372   zmumps_c(&lu->id);
373 #else
374   dmumps_c(&lu->id);
375 #endif
376   if (lu->id.INFOG(1) < 0) {
377     SETERRQ1(1,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
378   }
379 
380   /* convert mumps solution x_seq to petsc mpi x */
381   if (lu->size > 1) {
382     if (!lu->myid){
383       ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr);
384     }
385     ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
386     ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
387     ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
388     ierr = VecDestroy(x_seq);CHKERRQ(ierr);
389   } else {
390     ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);
391   }
392 
393   PetscFunctionReturn(0);
394 }
395 
396 /*
397   input:
398    F:        numeric factor
399   output:
400    nneg:     total number of negative pivots
401    nzero:    0
402    npos:     (global dimension of F) - nneg
403 */
404 
405 #undef __FUNCT__
406 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS"
407 int MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
408 {
409   Mat_MUMPS  *lu =(Mat_MUMPS*)F->spptr;
410   int        ierr,size;
411 
412   PetscFunctionBegin;
413   ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr);
414   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
415   if (size > 1 && lu->id.ICNTL(13) != 1){
416     SETERRQ1(1,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
417   }
418   if (nneg){
419     if (!lu->myid){
420       *nneg = lu->id.INFOG(12);
421     }
422     ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr);
423   }
424   if (nzero) *nzero = 0;
425   if (npos)  *npos  = F->M - (*nneg);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatFactorNumeric_MPIAIJMUMPS"
431 int MatFactorNumeric_AIJMUMPS(Mat A,Mat *F) {
432   Mat_MUMPS  *lu =(Mat_MUMPS*)(*F)->spptr;
433   Mat_MUMPS  *lua=(Mat_MUMPS*)(A)->spptr;
434   int        rnz,nnz,ierr,nz,i,M=A->M,*ai,*aj,icntl;
435   PetscTruth valOnly,flg;
436 
437   PetscFunctionBegin;
438   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
439     (*F)->ops->solve    = MatSolve_AIJMUMPS;
440 
441     /* Initialize a MUMPS instance */
442     ierr = MPI_Comm_rank(A->comm, &lu->myid);
443     ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr);
444     lua->myid = lu->myid; lua->size = lu->size;
445     lu->id.job = JOB_INIT;
446     ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr);
447     lu->id.comm_fortran = lu->comm_mumps;
448 
449     /* Set mumps options */
450     ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
451     lu->id.par=1;  /* host participates factorizaton and solve */
452     lu->id.sym=lu->sym;
453     if (lu->sym == 2){
454       ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr);
455       if (flg && icntl == 1) lu->id.sym=icntl;  /* matrix is spd */
456     }
457 #if defined(PETSC_USE_COMPLEX)
458   zmumps_c(&lu->id);
459 #else
460   dmumps_c(&lu->id);
461 #endif
462 
463     if (lu->size == 1){
464       lu->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
465     } else {
466       lu->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
467     }
468 
469     icntl=-1;
470     ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
471     if (flg && icntl > 0) {
472       lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
473     } else { /* no output */
474       lu->id.ICNTL(1) = 0;  /* error message, default= 6 */
475       lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */
476       lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */
477       lu->id.ICNTL(4) = 0;  /* level of printing, 0,1,2,3,4, default=2 */
478     }
479     ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr);
480     icntl=-1;
481     ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
482     if (flg) {
483       if (icntl== 1){
484         SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
485       } else {
486         lu->id.ICNTL(7) = icntl;
487       }
488     }
489     ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr);
490     ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr);
491     ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr);
492     ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr);
493     ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr);
494     ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr);
495     ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr);
496 
497     /*
498     ierr = PetscOptionsInt("-mat_mumps_icntl_16","ICNTL(16): 1: rank detection; 2: rank detection and nullspace","None",lu->id.ICNTL(16),&icntl,&flg);CHKERRQ(ierr);
499     if (flg){
500       if (icntl >-1 && icntl <3 ){
501         if (lu->myid==0) lu->id.ICNTL(16) = icntl;
502       } else {
503         SETERRQ1(PETSC_ERR_SUP,"ICNTL(16)=%d -- not supported\n",icntl);
504       }
505     }
506     */
507 
508     ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr);
509     ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr);
510     ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr);
511     PetscOptionsEnd();
512   }
513 
514   /* define matrix A */
515   switch (lu->id.ICNTL(18)){
516   case 0:  /* centralized assembled matrix input (size=1) */
517     if (!lu->myid) {
518       if (lua->isAIJ){
519         Mat_SeqAIJ   *aa = (Mat_SeqAIJ*)A->data;
520         nz               = aa->nz;
521         ai = aa->i; aj = aa->j; lu->val = aa->a;
522       } else {
523         Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
524         nz                  =  aa->nz;
525         ai = aa->i; aj = aa->j; lu->val = aa->a;
526       }
527       if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */
528         ierr = PetscMalloc(nz*sizeof(int),&lu->irn);CHKERRQ(ierr);
529         ierr = PetscMalloc(nz*sizeof(int),&lu->jcn);CHKERRQ(ierr);
530         nz = 0;
531         for (i=0; i<M; i++){
532           rnz = ai[i+1] - ai[i];
533           while (rnz--) {  /* Fortran row/col index! */
534             lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++;
535           }
536         }
537       }
538     }
539     break;
540   case 3:  /* distributed assembled matrix input (size>1) */
541     if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
542       valOnly = PETSC_FALSE;
543     } else {
544       valOnly = PETSC_TRUE; /* only update mat values, not row and col index */
545     }
546     ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr);
547     break;
548   default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS.");
549   }
550 
551   /* analysis phase */
552   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
553      lu->id.n = M;
554     switch (lu->id.ICNTL(18)){
555     case 0:  /* centralized assembled matrix input */
556       if (!lu->myid) {
557         lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
558         if (lu->id.ICNTL(6)>1){
559 #if defined(PETSC_USE_COMPLEX)
560           lu->id.a = (mumps_double_complex*)lu->val;
561 #else
562           lu->id.a = lu->val;
563 #endif
564         }
565       }
566       break;
567     case 3:  /* distributed assembled matrix input (size>1) */
568       lu->id.nz_loc = nnz;
569       lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
570       if (lu->id.ICNTL(6)>1) {
571 #if defined(PETSC_USE_COMPLEX)
572         lu->id.a_loc = (mumps_double_complex*)lu->val;
573 #else
574         lu->id.a_loc = lu->val;
575 #endif
576       }
577       break;
578     }
579     lu->id.job=1;
580 #if defined(PETSC_USE_COMPLEX)
581   zmumps_c(&lu->id);
582 #else
583   dmumps_c(&lu->id);
584 #endif
585     if (lu->id.INFOG(1) < 0) {
586       SETERRQ1(1,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
587     }
588   }
589 
590   /* numerical factorization phase */
591   if(!lu->id.ICNTL(18)) {
592     if (!lu->myid) {
593 #if defined(PETSC_USE_COMPLEX)
594       lu->id.a = (mumps_double_complex*)lu->val;
595 #else
596       lu->id.a = lu->val;
597 #endif
598     }
599   } else {
600 #if defined(PETSC_USE_COMPLEX)
601     lu->id.a_loc = (mumps_double_complex*)lu->val;
602 #else
603     lu->id.a_loc = lu->val;
604 #endif
605   }
606   lu->id.job=2;
607 #if defined(PETSC_USE_COMPLEX)
608   zmumps_c(&lu->id);
609 #else
610   dmumps_c(&lu->id);
611 #endif
612   if (lu->id.INFOG(1) < 0) {
613     SETERRQ2(1,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
614   }
615 
616   if (lu->myid==0 && lu->id.ICNTL(16) > 0){
617     SETERRQ1(1,"  lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
618   }
619 
620   (*F)->assembled  = PETSC_TRUE;
621   lu->matstruc     = SAME_NONZERO_PATTERN;
622   lu->CleanUpMUMPS = PETSC_TRUE;
623   PetscFunctionReturn(0);
624 }
625 
626 /* Note the Petsc r and c permutations are ignored */
627 #undef __FUNCT__
628 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS"
629 int MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) {
630   Mat       B;
631   Mat_MUMPS *lu;
632   int       ierr;
633 
634   PetscFunctionBegin;
635 
636   /* Create the factorization matrix */
637   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr);
638   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
639   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
640   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
641 
642   B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS;
643   B->factor               = FACTOR_LU;
644   lu                      = (Mat_MUMPS*)B->spptr;
645   lu->sym                 = 0;
646   lu->matstruc            = DIFFERENT_NONZERO_PATTERN;
647 
648   *F = B;
649   PetscFunctionReturn(0);
650 }
651 
652 /* Note the Petsc r permutation is ignored */
653 #undef __FUNCT__
654 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS"
655 int MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) {
656   Mat       B;
657   Mat_MUMPS *lu;
658   int       ierr;
659 
660   PetscFunctionBegin;
661 
662   /* Create the factorization matrix */
663   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr);
664   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
665   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
666   ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
667 
668   B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS;
669   B->ops->getinertia            = MatGetInertia_SBAIJMUMPS;
670   B->factor                     = FACTOR_CHOLESKY;
671   lu                            = (Mat_MUMPS*)B->spptr;
672   lu->sym                       = 2;
673   lu->matstruc                  = DIFFERENT_NONZERO_PATTERN;
674 
675   *F = B;
676   PetscFunctionReturn(0);
677 }
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS"
681 int MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) {
682   int       ierr;
683   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
684 
685   PetscFunctionBegin;
686   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
687 
688   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
689   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
690   A->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
691   PetscFunctionReturn(0);
692 }
693 
694 EXTERN_C_BEGIN
695 #undef __FUNCT__
696 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS"
697 int MatConvert_AIJ_AIJMUMPS(Mat A,const MatType newtype,Mat *newmat) {
698   int       ierr,size;
699   MPI_Comm  comm;
700   Mat       B=*newmat;
701   Mat_MUMPS *mumps;
702 
703   PetscFunctionBegin;
704   if (B != A) {
705     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
706   }
707 
708   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
709   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
710 
711   mumps->MatDuplicate              = A->ops->duplicate;
712   mumps->MatView                   = A->ops->view;
713   mumps->MatAssemblyEnd            = A->ops->assemblyend;
714   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
715   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
716   mumps->MatDestroy                = A->ops->destroy;
717   mumps->specialdestroy            = MatDestroy_AIJMUMPS;
718   mumps->CleanUpMUMPS              = PETSC_FALSE;
719   mumps->isAIJ                     = PETSC_TRUE;
720 
721   B->spptr                         = (void*)mumps;
722   B->ops->duplicate                = MatDuplicate_MUMPS;
723   B->ops->view                     = MatView_AIJMUMPS;
724   B->ops->assemblyend              = MatAssemblyEnd_AIJMUMPS;
725   B->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
726   B->ops->destroy                  = MatDestroy_MUMPS;
727 
728   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
729   if (size == 1) {
730     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C",
731                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
732     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C",
733                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
734   } else {
735     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C",
736                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
737     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C",
738                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
739   }
740 
741   PetscLogInfo(0,"Using MUMPS for LU factorization and solves.");
742   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
743   *newmat = B;
744   PetscFunctionReturn(0);
745 }
746 EXTERN_C_END
747 
748 /*MC
749   MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed
750   and sequential matrices via the external package MUMPS.
751 
752   If MUMPS is installed (see the manual for instructions
753   on how to declare the existence of external packages),
754   a matrix type can be constructed which invokes MUMPS solvers.
755   After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS).
756   This matrix type is only supported for double precision real.
757 
758   If created with a single process communicator, this matrix type inherits from MATSEQAIJ.
759   Otherwise, this matrix type inherits from MATMPIAIJ.  Hence for single process communicators,
760   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
761   for communicators controlling multiple processes.  It is recommended that you call both of
762   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
763   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
764   without data copy.
765 
766   Options Database Keys:
767 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions()
768 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
769 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level
770 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
771 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
772 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
773 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
774 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
775 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
776 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
777 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
778 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
779 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
780 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
781 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
782 
783   Level: beginner
784 
785 .seealso: MATSBAIJMUMPS
786 M*/
787 
788 EXTERN_C_BEGIN
789 #undef __FUNCT__
790 #define __FUNCT__ "MatCreate_AIJMUMPS"
791 int MatCreate_AIJMUMPS(Mat A) {
792   int      ierr,size;
793   Mat      A_diag;
794   MPI_Comm comm;
795 
796   PetscFunctionBegin;
797   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
798   /*   and AIJMUMPS types */
799   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr);
800   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
801   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
802   if (size == 1) {
803     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
804   } else {
805     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
806     A_diag = ((Mat_MPIAIJ *)A->data)->A;
807     ierr   = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,&A_diag);CHKERRQ(ierr);
808   }
809   ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,&A);CHKERRQ(ierr);
810   PetscFunctionReturn(0);
811 }
812 EXTERN_C_END
813 
814 #undef __FUNCT__
815 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS"
816 int MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) {
817   int       ierr;
818   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
819 
820   PetscFunctionBegin;
821   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
822   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
823   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
824   A->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
825   PetscFunctionReturn(0);
826 }
827 
828 EXTERN_C_BEGIN
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS"
831 int MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat  B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
832 {
833   Mat       A;
834   Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr;
835   int       ierr;
836 
837   PetscFunctionBegin;
838   /*
839     After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix
840     into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS.  I would
841     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
842     block size info so that PETSc can determine the local size properly.  The block size info is set
843     in the preallocation routine.
844   */
845   ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
846   A    = ((Mat_MPISBAIJ *)B->data)->A;
847   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 EXTERN_C_END
851 
852 EXTERN_C_BEGIN
853 #undef __FUNCT__
854 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS"
855 int MatConvert_SBAIJ_SBAIJMUMPS(Mat A,const MatType newtype,Mat *newmat) {
856   int       ierr,size;
857   MPI_Comm  comm;
858   Mat       B=*newmat;
859   Mat_MUMPS *mumps;
860   void      (*f)(void);
861 
862   PetscFunctionBegin;
863   if (B != A) {
864     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
865   }
866 
867   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
868   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
869 
870   mumps->MatDuplicate              = A->ops->duplicate;
871   mumps->MatView                   = A->ops->view;
872   mumps->MatAssemblyEnd            = A->ops->assemblyend;
873   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
874   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
875   mumps->MatDestroy                = A->ops->destroy;
876   mumps->specialdestroy            = MatDestroy_SBAIJMUMPS;
877   mumps->CleanUpMUMPS              = PETSC_FALSE;
878   mumps->isAIJ                     = PETSC_FALSE;
879 
880   B->spptr                         = (void*)mumps;
881   B->ops->duplicate                = MatDuplicate_MUMPS;
882   B->ops->view                     = MatView_AIJMUMPS;
883   B->ops->assemblyend              = MatAssemblyEnd_SBAIJMUMPS;
884   B->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
885   B->ops->destroy                  = MatDestroy_MUMPS;
886 
887   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
888   if (size == 1) {
889     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C",
890                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
891     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C",
892                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
893   } else {
894   /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */
895     ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr);
896     if (f) {
897       mumps->MatPreallocate = (int (*)(Mat,int,int,int*,int,int*))f;
898       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
899                                                "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS",
900                                                MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr);
901     }
902     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C",
903                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
904     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C",
905                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
906   }
907 
908   PetscLogInfo(0,"Using MUMPS for Cholesky factorization and solves.");
909   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
910   *newmat = B;
911   PetscFunctionReturn(0);
912 }
913 EXTERN_C_END
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatDuplicate_MUMPS"
917 int MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) {
918   int         ierr;
919   Mat_MUMPS   *lu=(Mat_MUMPS *)A->spptr;
920 
921   PetscFunctionBegin;
922   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
923   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr);
924   PetscFunctionReturn(0);
925 }
926 
927 /*MC
928   MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for
929   distributed and sequential matrices via the external package MUMPS.
930 
931   If MUMPS is installed (see the manual for instructions
932   on how to declare the existence of external packages),
933   a matrix type can be constructed which invokes MUMPS solvers.
934   After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS).
935   This matrix type is only supported for double precision real.
936 
937   If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ.
938   Otherwise, this matrix type inherits from MATMPISBAIJ.  Hence for single process communicators,
939   MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
940   for communicators controlling multiple processes.  It is recommended that you call both of
941   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
942   conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size)
943   without data copy.
944 
945   Options Database Keys:
946 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions()
947 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
948 . -mat_mumps_icntl_4 <0,...,4> - print level
949 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
950 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
951 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
952 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
953 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
954 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
955 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
956 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
957 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
958 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
959 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
960 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
961 
962   Level: beginner
963 
964 .seealso: MATAIJMUMPS
965 M*/
966 
967 EXTERN_C_BEGIN
968 #undef __FUNCT__
969 #define __FUNCT__ "MatCreate_SBAIJMUMPS"
970 int MatCreate_SBAIJMUMPS(Mat A) {
971   int ierr,size;
972 
973   PetscFunctionBegin;
974   /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */
975   /*   and SBAIJMUMPS types */
976   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr);
977   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
978   if (size == 1) {
979     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
980   } else {
981     ierr   = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
982   }
983   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 EXTERN_C_END
987