xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision cb5d8e9ef36bd5726d5c4e38d977490d990864c9)
1 
2 /*
3     Provides an interface to the MUMPS_4.3.1 sparse solver
4 */
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/mat/impls/sbaij/seq/sbaij.h"
8 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #include "zmumps_c.h"
13 #else
14 #include "dmumps_c.h"
15 #endif
16 EXTERN_C_END
17 #define JOB_INIT -1
18 #define JOB_END -2
19 /* macros s.t. indices match MUMPS documentation */
20 #define ICNTL(I) icntl[(I)-1]
21 #define CNTL(I) cntl[(I)-1]
22 #define INFOG(I) infog[(I)-1]
23 #define INFO(I) info[(I)-1]
24 #define RINFOG(I) rinfog[(I)-1]
25 #define RINFO(I) rinfo[(I)-1]
26 
27 typedef struct {
28 #if defined(PETSC_USE_COMPLEX)
29   ZMUMPS_STRUC_C id;
30 #else
31   DMUMPS_STRUC_C id;
32 #endif
33   MatStructure   matstruc;
34   int            myid,size,*irn,*jcn,sym;
35   PetscScalar    *val;
36   MPI_Comm       comm_mumps;
37 
38   PetscTruth     isAIJ,CleanUpMUMPS;
39   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
40   PetscErrorCode (*MatView)(Mat,PetscViewer);
41   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
42   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
43   PetscErrorCode (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
44   PetscErrorCode (*MatDestroy)(Mat);
45   PetscErrorCode (*specialdestroy)(Mat);
46   PetscErrorCode (*MatPreallocate)(Mat,int,int,int*,int,int*);
47 } Mat_MUMPS;
48 
49 EXTERN PetscErrorCode MatDuplicate_MUMPS(Mat,MatDuplicateOption,Mat*);
50 EXTERN_C_BEGIN
51 PetscErrorCode MatConvert_SBAIJ_SBAIJMUMPS(Mat,const MatType,Mat*);
52 EXTERN_C_END
53 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */
54 /*
55   input:
56     A       - matrix in mpiaij or mpisbaij (bs=1) format
57     shift   - 0: C style output triple; 1: Fortran style output triple.
58     valOnly - FALSE: spaces are allocated and values are set for the triple
59               TRUE:  only the values in v array are updated
60   output:
61     nnz     - dim of r, c, and v (number of local nonzero entries of A)
62     r, c, v - row and col index, matrix values (matrix triples)
63  */
64 PetscErrorCode MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) {
65   int         *ai, *aj, *bi, *bj, rstart,nz, *garray;
66   PetscErrorCode ierr;
67   int         i,j,jj,jB,irow,m=A->m,*ajj,*bjj,countA,countB,colA_start,jcol;
68   int         *row,*col;
69   PetscScalar *av, *bv,*val;
70   Mat_MUMPS   *mumps=(Mat_MUMPS*)A->spptr;
71 
72   PetscFunctionBegin;
73   if (mumps->isAIJ){
74     Mat_MPIAIJ    *mat =  (Mat_MPIAIJ*)A->data;
75     Mat_SeqAIJ    *aa=(Mat_SeqAIJ*)(mat->A)->data;
76     Mat_SeqAIJ    *bb=(Mat_SeqAIJ*)(mat->B)->data;
77     nz = aa->nz + bb->nz;
78     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart;
79     garray = mat->garray;
80     av=aa->a; bv=bb->a;
81 
82   } else {
83     Mat_MPISBAIJ  *mat =  (Mat_MPISBAIJ*)A->data;
84     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
85     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
86     if (A->bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", A->bs);
87     nz = aa->nz + bb->nz;
88     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart;
89     garray = mat->garray;
90     av=aa->a; bv=bb->a;
91   }
92 
93   if (!valOnly){
94     ierr = PetscMalloc(nz*sizeof(int),&row);CHKERRQ(ierr);
95     ierr = PetscMalloc(nz*sizeof(int),&col);CHKERRQ(ierr);
96     ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr);
97     *r = row; *c = col; *v = val;
98   } else {
99     row = *r; col = *c; val = *v;
100   }
101   *nnz = nz;
102 
103   jj = 0; irow = rstart;
104   for ( i=0; i<m; i++ ) {
105     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
106     countA = ai[i+1] - ai[i];
107     countB = bi[i+1] - bi[i];
108     bjj = bj + bi[i];
109 
110     /* get jB, the starting local col index for the 2nd B-part */
111     colA_start = rstart + ajj[0]; /* the smallest col index for A */
112     j=-1;
113     do {
114       j++;
115       if (j == countB) break;
116       jcol = garray[bjj[j]];
117     } while (jcol < colA_start);
118     jB = j;
119 
120     /* B-part, smaller col index */
121     colA_start = rstart + ajj[0]; /* the smallest col index for A */
122     for (j=0; j<jB; j++){
123       jcol = garray[bjj[j]];
124       if (!valOnly){
125         row[jj] = irow + shift; col[jj] = jcol + shift;
126 
127       }
128       val[jj++] = *bv++;
129     }
130     /* A-part */
131     for (j=0; j<countA; j++){
132       if (!valOnly){
133         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
134       }
135       val[jj++] = *av++;
136     }
137     /* B-part, larger col index */
138     for (j=jB; j<countB; j++){
139       if (!valOnly){
140         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
141       }
142       val[jj++] = *bv++;
143     }
144     irow++;
145   }
146 
147   PetscFunctionReturn(0);
148 }
149 
150 EXTERN_C_BEGIN
151 #undef __FUNCT__
152 #define __FUNCT__ "MatConvert_MUMPS_Base"
153 PetscErrorCode MatConvert_MUMPS_Base(Mat A,const MatType type,Mat *newmat) \
154 {
155   PetscErrorCode ierr;
156   Mat            B=*newmat;
157   Mat_MUMPS      *mumps=(Mat_MUMPS*)A->spptr;
158   void           (*f)(void);
159 
160   PetscFunctionBegin;
161   if (B != A) {
162     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
163   }
164   B->ops->duplicate              = mumps->MatDuplicate;
165   B->ops->view                   = mumps->MatView;
166   B->ops->assemblyend            = mumps->MatAssemblyEnd;
167   B->ops->lufactorsymbolic       = mumps->MatLUFactorSymbolic;
168   B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic;
169   B->ops->destroy                = mumps->MatDestroy;
170 
171   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr);
172   if (f) {
173     ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","",(FCNVOID)mumps->MatPreallocate);CHKERRQ(ierr);
174   }
175   ierr = PetscFree(mumps);CHKERRQ(ierr);
176 
177   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
178   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
179   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_aijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
180   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_aijmumps_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
181   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
182   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
183   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C","",PETSC_NULL);CHKERRQ(ierr);
184   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C","",PETSC_NULL);CHKERRQ(ierr);
185 
186   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
187   *newmat = B;
188   PetscFunctionReturn(0);
189 }
190 EXTERN_C_END
191 
192 #undef __FUNCT__
193 #define __FUNCT__ "MatDestroy_MUMPS"
194 PetscErrorCode MatDestroy_MUMPS(Mat A)
195 {
196   Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
197   PetscErrorCode ierr;
198   int       size=lu->size;
199   PetscErrorCode (*specialdestroy)(Mat);
200   PetscFunctionBegin;
201   if (lu->CleanUpMUMPS) {
202     /* Terminate instance, deallocate memories */
203     lu->id.job=JOB_END;
204 #if defined(PETSC_USE_COMPLEX)
205     zmumps_c(&lu->id);
206 #else
207     dmumps_c(&lu->id);
208 #endif
209     if (lu->irn) {
210       ierr = PetscFree(lu->irn);CHKERRQ(ierr);
211     }
212     if (lu->jcn) {
213       ierr = PetscFree(lu->jcn);CHKERRQ(ierr);
214     }
215     if (size>1 && lu->val) {
216       ierr = PetscFree(lu->val);CHKERRQ(ierr);
217     }
218     ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr);
219   }
220   specialdestroy = lu->specialdestroy;
221   ierr = (*specialdestroy)(A);CHKERRQ(ierr);
222   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
223   PetscFunctionReturn(0);
224 }
225 
226 #undef __FUNCT__
227 #define __FUNCT__ "MatDestroy_AIJMUMPS"
228 PetscErrorCode MatDestroy_AIJMUMPS(Mat A)
229 {
230   PetscErrorCode ierr;
231   int  size;
232 
233   PetscFunctionBegin;
234   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
235   if (size==1) {
236     ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
237   } else {
238     ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
239   }
240   PetscFunctionReturn(0);
241 }
242 
243 #undef __FUNCT__
244 #define __FUNCT__ "MatDestroy_SBAIJMUMPS"
245 PetscErrorCode MatDestroy_SBAIJMUMPS(Mat A)
246 {
247   PetscErrorCode ierr;
248   int  size;
249 
250   PetscFunctionBegin;
251   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
252   if (size==1) {
253     ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,&A);CHKERRQ(ierr);
254   } else {
255     ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,&A);CHKERRQ(ierr);
256   }
257   PetscFunctionReturn(0);
258 }
259 
260 #undef __FUNCT__
261 #define __FUNCT__ "MatFactorInfo_MUMPS"
262 PetscErrorCode MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) {
263   Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
264   PetscErrorCode ierr;
265 
266   PetscFunctionBegin;
267   ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
268   ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                  %d \n",lu->id.sym);CHKERRQ(ierr);
269   ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):           %d \n",lu->id.par);CHKERRQ(ierr);
270   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):       %d \n",lu->id.ICNTL(4));CHKERRQ(ierr);
271   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):        %d \n",lu->id.ICNTL(5));CHKERRQ(ierr);
272   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):       %d \n",lu->id.ICNTL(6));CHKERRQ(ierr);
273   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (matrix ordering):         %d \n",lu->id.ICNTL(7));CHKERRQ(ierr);
274   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(9) (A/A^T x=b is solved):     %d \n",lu->id.ICNTL(9));CHKERRQ(ierr);
275   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr);
276   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):         %d \n",lu->id.ICNTL(11));CHKERRQ(ierr);
277   if (!lu->myid && lu->id.ICNTL(11)>0) {
278     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(4) (inf norm of input mat):        %g\n",lu->id.RINFOG(4));CHKERRQ(ierr);
279     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(5) (inf norm of solution):         %g\n",lu->id.RINFOG(5));CHKERRQ(ierr);
280     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(6) (inf norm of residual):         %g\n",lu->id.RINFOG(6));CHKERRQ(ierr);
281     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr);
282     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(9) (error estimate):               %g \n",lu->id.RINFOG(9));CHKERRQ(ierr);
283     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr);
284 
285   }
286   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",lu->id.ICNTL(12));CHKERRQ(ierr);
287   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",lu->id.ICNTL(13));CHKERRQ(ierr);
288   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr);
289   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(15) (efficiency control):                         %d \n",lu->id.ICNTL(15));CHKERRQ(ierr);
290   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",lu->id.ICNTL(18));CHKERRQ(ierr);
291 
292   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",lu->id.CNTL(1));CHKERRQ(ierr);
293   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr);
294   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",lu->id.CNTL(3));CHKERRQ(ierr);
295 
296   /* infomation local to each processor */
297   if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
298   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr);
299   ierr = PetscSynchronizedFlush(A->comm);
300   if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
301   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr);
302   ierr = PetscSynchronizedFlush(A->comm);
303   if (!lu->myid) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
304   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr);
305   ierr = PetscSynchronizedFlush(A->comm);
306 
307   if (!lu->myid){ /* information from the host */
308     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr);
309     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr);
310     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr);
311 
312     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr);
313     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr);
314     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr);
315     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr);
316     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr);
317     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr);
318     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr);
319     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr);
320     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr);
321     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr);
322     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr);
323     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr);
324     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr);
325     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr);
326     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr);
327     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr);
328     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr);
329      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr);
330   }
331 
332   PetscFunctionReturn(0);
333 }
334 
335 #undef __FUNCT__
336 #define __FUNCT__ "MatView_AIJMUMPS"
337 PetscErrorCode MatView_AIJMUMPS(Mat A,PetscViewer viewer) {
338   PetscErrorCode ierr;
339   PetscTruth        iascii;
340   PetscViewerFormat format;
341   Mat_MUMPS         *mumps=(Mat_MUMPS*)(A->spptr);
342 
343   PetscFunctionBegin;
344   ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr);
345 
346   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
347   if (iascii) {
348     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
349     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
350       ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr);
351     }
352   }
353   PetscFunctionReturn(0);
354 }
355 
356 #undef __FUNCT__
357 #define __FUNCT__ "MatSolve_AIJMUMPS"
358 PetscErrorCode MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) {
359   Mat_MUMPS   *lu=(Mat_MUMPS*)A->spptr;
360   PetscScalar *array;
361   Vec         x_seq;
362   IS          iden;
363   VecScatter  scat;
364   PetscErrorCode ierr;
365 
366   PetscFunctionBegin;
367   if (lu->size > 1){
368     if (!lu->myid){
369       ierr = VecCreateSeq(PETSC_COMM_SELF,A->N,&x_seq);CHKERRQ(ierr);
370       ierr = ISCreateStride(PETSC_COMM_SELF,A->N,0,1,&iden);CHKERRQ(ierr);
371     } else {
372       ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr);
373       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr);
374     }
375     ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr);
376     ierr = ISDestroy(iden);CHKERRQ(ierr);
377 
378     ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
379     ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
380     if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);}
381   } else {  /* size == 1 */
382     ierr = VecCopy(b,x);CHKERRQ(ierr);
383     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
384   }
385   if (!lu->myid) { /* define rhs on the host */
386 #if defined(PETSC_USE_COMPLEX)
387     lu->id.rhs = (mumps_double_complex*)array;
388 #else
389     lu->id.rhs = array;
390 #endif
391   }
392 
393   /* solve phase */
394   lu->id.job=3;
395 #if defined(PETSC_USE_COMPLEX)
396   zmumps_c(&lu->id);
397 #else
398   dmumps_c(&lu->id);
399 #endif
400   if (lu->id.INFOG(1) < 0) {
401     SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
402   }
403 
404   /* convert mumps solution x_seq to petsc mpi x */
405   if (lu->size > 1) {
406     if (!lu->myid){
407       ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr);
408     }
409     ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
410     ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
411     ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
412     ierr = VecDestroy(x_seq);CHKERRQ(ierr);
413   } else {
414     ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);
415   }
416 
417   PetscFunctionReturn(0);
418 }
419 
420 /*
421   input:
422    F:        numeric factor
423   output:
424    nneg:     total number of negative pivots
425    nzero:    0
426    npos:     (global dimension of F) - nneg
427 */
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS"
431 PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
432 {
433   Mat_MUMPS  *lu =(Mat_MUMPS*)F->spptr;
434   PetscErrorCode ierr;
435   int        size;
436 
437   PetscFunctionBegin;
438   ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr);
439   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
440   if (size > 1 && lu->id.ICNTL(13) != 1){
441     SETERRQ1(PETSC_ERR_ARG_WRONG,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
442   }
443   if (nneg){
444     if (!lu->myid){
445       *nneg = lu->id.INFOG(12);
446     }
447     ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr);
448   }
449   if (nzero) *nzero = 0;
450   if (npos)  *npos  = F->M - (*nneg);
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatFactorNumeric_AIJMUMPS"
456 PetscErrorCode MatFactorNumeric_AIJMUMPS(Mat A,Mat *F) {
457   Mat_MUMPS  *lu =(Mat_MUMPS*)(*F)->spptr;
458   Mat_MUMPS  *lua=(Mat_MUMPS*)(A)->spptr;
459   PetscErrorCode ierr;
460   int        rnz,nnz,nz,i,M=A->M,*ai,*aj,icntl;
461   PetscTruth valOnly,flg;
462 
463   PetscFunctionBegin;
464   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
465     (*F)->ops->solve    = MatSolve_AIJMUMPS;
466 
467     /* Initialize a MUMPS instance */
468     ierr = MPI_Comm_rank(A->comm, &lu->myid);
469     ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr);
470     lua->myid = lu->myid; lua->size = lu->size;
471     lu->id.job = JOB_INIT;
472     ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr);
473     lu->id.comm_fortran = lu->comm_mumps;
474 
475     /* Set mumps options */
476     ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
477     lu->id.par=1;  /* host participates factorizaton and solve */
478     lu->id.sym=lu->sym;
479     if (lu->sym == 2){
480       ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr);
481       if (flg && icntl == 1) lu->id.sym=icntl;  /* matrix is spd */
482     }
483 #if defined(PETSC_USE_COMPLEX)
484     zmumps_c(&lu->id);
485 #else
486     dmumps_c(&lu->id);
487 #endif
488 
489     if (lu->size == 1){
490       lu->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
491     } else {
492       lu->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
493     }
494 
495     icntl=-1;
496     ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
497     if ((flg && icntl > 0) || PetscLogPrintInfo) {
498       lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
499     } else { /* no output */
500       lu->id.ICNTL(1) = 0;  /* error message, default= 6 */
501       lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */
502       lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */
503       lu->id.ICNTL(4) = 0;  /* level of printing, 0,1,2,3,4, default=2 */
504     }
505     ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr);
506     icntl=-1;
507     ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
508     if (flg) {
509       if (icntl== 1){
510         SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
511       } else {
512         lu->id.ICNTL(7) = icntl;
513       }
514     }
515     ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr);
516     ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr);
517     ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr);
518     ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr);
519     ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr);
520     ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr);
521     ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr);
522 
523     /*
524     ierr = PetscOptionsInt("-mat_mumps_icntl_16","ICNTL(16): 1: rank detection; 2: rank detection and nullspace","None",lu->id.ICNTL(16),&icntl,&flg);CHKERRQ(ierr);
525     if (flg){
526       if (icntl >-1 && icntl <3 ){
527         if (lu->myid==0) lu->id.ICNTL(16) = icntl;
528       } else {
529         SETERRQ1(PETSC_ERR_SUP,"ICNTL(16)=%d -- not supported\n",icntl);
530       }
531     }
532     */
533 
534     ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr);
535     ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr);
536     ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr);
537     PetscOptionsEnd();
538   }
539 
540   /* define matrix A */
541   switch (lu->id.ICNTL(18)){
542   case 0:  /* centralized assembled matrix input (size=1) */
543     if (!lu->myid) {
544       if (lua->isAIJ){
545         Mat_SeqAIJ   *aa = (Mat_SeqAIJ*)A->data;
546         nz               = aa->nz;
547         ai = aa->i; aj = aa->j; lu->val = aa->a;
548       } else {
549         Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
550         nz                  =  aa->nz;
551         ai = aa->i; aj = aa->j; lu->val = aa->a;
552       }
553       if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */
554         ierr = PetscMalloc(nz*sizeof(int),&lu->irn);CHKERRQ(ierr);
555         ierr = PetscMalloc(nz*sizeof(int),&lu->jcn);CHKERRQ(ierr);
556         nz = 0;
557         for (i=0; i<M; i++){
558           rnz = ai[i+1] - ai[i];
559           while (rnz--) {  /* Fortran row/col index! */
560             lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++;
561           }
562         }
563       }
564     }
565     break;
566   case 3:  /* distributed assembled matrix input (size>1) */
567     if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
568       valOnly = PETSC_FALSE;
569     } else {
570       valOnly = PETSC_TRUE; /* only update mat values, not row and col index */
571     }
572     ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr);
573     break;
574   default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS.");
575   }
576 
577   /* analysis phase */
578   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
579      lu->id.n = M;
580     switch (lu->id.ICNTL(18)){
581     case 0:  /* centralized assembled matrix input */
582       if (!lu->myid) {
583         lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
584         if (lu->id.ICNTL(6)>1){
585 #if defined(PETSC_USE_COMPLEX)
586           lu->id.a = (mumps_double_complex*)lu->val;
587 #else
588           lu->id.a = lu->val;
589 #endif
590         }
591       }
592       break;
593     case 3:  /* distributed assembled matrix input (size>1) */
594       lu->id.nz_loc = nnz;
595       lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
596       if (lu->id.ICNTL(6)>1) {
597 #if defined(PETSC_USE_COMPLEX)
598         lu->id.a_loc = (mumps_double_complex*)lu->val;
599 #else
600         lu->id.a_loc = lu->val;
601 #endif
602       }
603       break;
604     }
605     lu->id.job=1;
606 #if defined(PETSC_USE_COMPLEX)
607     zmumps_c(&lu->id);
608 #else
609     dmumps_c(&lu->id);
610 #endif
611     if (lu->id.INFOG(1) < 0) {
612       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
613     }
614   }
615 
616   /* numerical factorization phase */
617   if(!lu->id.ICNTL(18)) {
618     if (!lu->myid) {
619 #if defined(PETSC_USE_COMPLEX)
620       lu->id.a = (mumps_double_complex*)lu->val;
621 #else
622       lu->id.a = lu->val;
623 #endif
624     }
625   } else {
626 #if defined(PETSC_USE_COMPLEX)
627     lu->id.a_loc = (mumps_double_complex*)lu->val;
628 #else
629     lu->id.a_loc = lu->val;
630 #endif
631   }
632   lu->id.job=2;
633 #if defined(PETSC_USE_COMPLEX)
634   zmumps_c(&lu->id);
635 #else
636   dmumps_c(&lu->id);
637 #endif
638   if (lu->id.INFOG(1) < 0) {
639     if (lu->id.INFO(1) == -13) {
640       SETERRQ1(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: Cannot allocate required memory %d megabytes\n",lu->id.INFO(2));
641     } else {
642       SETERRQ2(PETSC_ERR_LIB,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
643     }
644   }
645 
646   if (!lu->myid && lu->id.ICNTL(16) > 0){
647     SETERRQ1(PETSC_ERR_LIB,"  lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
648   }
649 
650   (*F)->assembled  = PETSC_TRUE;
651   lu->matstruc     = SAME_NONZERO_PATTERN;
652   lu->CleanUpMUMPS = PETSC_TRUE;
653   PetscFunctionReturn(0);
654 }
655 
656 /* Note the Petsc r and c permutations are ignored */
657 #undef __FUNCT__
658 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS"
659 PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) {
660   Mat            B;
661   Mat_MUMPS      *lu;
662   PetscErrorCode ierr;
663 
664   PetscFunctionBegin;
665 
666   /* Create the factorization matrix */
667   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr);
668   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
669   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
670   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
671 
672   B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS;
673   B->factor               = FACTOR_LU;
674   lu                      = (Mat_MUMPS*)B->spptr;
675   lu->sym                 = 0;
676   lu->matstruc            = DIFFERENT_NONZERO_PATTERN;
677 
678   *F = B;
679   PetscFunctionReturn(0);
680 }
681 
682 /* Note the Petsc r permutation is ignored */
683 #undef __FUNCT__
684 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS"
685 PetscErrorCode MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) {
686   Mat       B;
687   Mat_MUMPS *lu;
688   PetscErrorCode ierr;
689 
690   PetscFunctionBegin;
691   /* Create the factorization matrix */
692   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr);
693   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
694   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
695   ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
696 
697   B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS;
698   B->ops->getinertia            = MatGetInertia_SBAIJMUMPS;
699   B->factor                     = FACTOR_CHOLESKY;
700   lu                            = (Mat_MUMPS*)B->spptr;
701   lu->sym                       = 2;
702   lu->matstruc                  = DIFFERENT_NONZERO_PATTERN;
703 
704   *F = B;
705   PetscFunctionReturn(0);
706 }
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS"
710 PetscErrorCode MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) {
711   PetscErrorCode ierr;
712   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
713 
714   PetscFunctionBegin;
715   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
716 
717   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
718   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
719   A->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
720   PetscFunctionReturn(0);
721 }
722 
723 EXTERN_C_BEGIN
724 #undef __FUNCT__
725 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS"
726 PetscErrorCode MatConvert_AIJ_AIJMUMPS(Mat A,const MatType newtype,Mat *newmat)
727 {
728   PetscErrorCode ierr;
729   PetscMPIInt    size;
730   MPI_Comm       comm;
731   Mat            B=*newmat;
732   Mat_MUMPS      *mumps;
733 
734   PetscFunctionBegin;
735   if (B != A) {
736     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
737   }
738 
739   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
740   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
741 
742   mumps->MatDuplicate              = A->ops->duplicate;
743   mumps->MatView                   = A->ops->view;
744   mumps->MatAssemblyEnd            = A->ops->assemblyend;
745   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
746   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
747   mumps->MatDestroy                = A->ops->destroy;
748   mumps->specialdestroy            = MatDestroy_AIJMUMPS;
749   mumps->CleanUpMUMPS              = PETSC_FALSE;
750   mumps->isAIJ                     = PETSC_TRUE;
751 
752   B->spptr                         = (void*)mumps;
753   B->ops->duplicate                = MatDuplicate_MUMPS;
754   B->ops->view                     = MatView_AIJMUMPS;
755   B->ops->assemblyend              = MatAssemblyEnd_AIJMUMPS;
756   B->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
757   B->ops->destroy                  = MatDestroy_MUMPS;
758 
759   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
760   if (size == 1) {
761     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C",
762                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
763     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C",
764                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
765   } else {
766     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C",
767                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
768     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C",
769                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
770   }
771 
772   PetscLogInfo(0,"Using MUMPS for LU factorization and solves.");
773   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
774   *newmat = B;
775   PetscFunctionReturn(0);
776 }
777 EXTERN_C_END
778 
779 /*MC
780   MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed
781   and sequential matrices via the external package MUMPS.
782 
783   If MUMPS is installed (see the manual for instructions
784   on how to declare the existence of external packages),
785   a matrix type can be constructed which invokes MUMPS solvers.
786   After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS).
787 
788   If created with a single process communicator, this matrix type inherits from MATSEQAIJ.
789   Otherwise, this matrix type inherits from MATMPIAIJ.  Hence for single process communicators,
790   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
791   for communicators controlling multiple processes.  It is recommended that you call both of
792   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
793   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
794   without data copy.
795 
796   Options Database Keys:
797 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions()
798 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
799 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level
800 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
801 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
802 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
803 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
804 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
805 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
806 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
807 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
808 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
809 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
810 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
811 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
812 
813   Level: beginner
814 
815 .seealso: MATSBAIJMUMPS
816 M*/
817 
818 EXTERN_C_BEGIN
819 #undef __FUNCT__
820 #define __FUNCT__ "MatCreate_AIJMUMPS"
821 PetscErrorCode MatCreate_AIJMUMPS(Mat A)
822 {
823   PetscErrorCode ierr;
824   int      size;
825   Mat      A_diag;
826   MPI_Comm comm;
827 
828   PetscFunctionBegin;
829   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
830   /*   and AIJMUMPS types */
831   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr);
832   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
833   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
834   if (size == 1) {
835     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
836   } else {
837     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
838     A_diag = ((Mat_MPIAIJ *)A->data)->A;
839     ierr   = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,&A_diag);CHKERRQ(ierr);
840   }
841   ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,&A);CHKERRQ(ierr);
842   PetscFunctionReturn(0);
843 }
844 EXTERN_C_END
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS"
848 PetscErrorCode MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode)
849 {
850   PetscErrorCode ierr;
851   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
852 
853   PetscFunctionBegin;
854   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
855   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
856   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
857   A->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
858   PetscFunctionReturn(0);
859 }
860 
861 EXTERN_C_BEGIN
862 #undef __FUNCT__
863 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS"
864 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat  B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
865 {
866   Mat       A;
867   Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr;
868   PetscErrorCode ierr;
869 
870   PetscFunctionBegin;
871   /*
872     After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix
873     into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS.  I would
874     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
875     block size info so that PETSc can determine the local size properly.  The block size info is set
876     in the preallocation routine.
877   */
878   ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
879   A    = ((Mat_MPISBAIJ *)B->data)->A;
880   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr);
881   PetscFunctionReturn(0);
882 }
883 EXTERN_C_END
884 
885 EXTERN_C_BEGIN
886 #undef __FUNCT__
887 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS"
888 PetscErrorCode MatConvert_SBAIJ_SBAIJMUMPS(Mat A,const MatType newtype,Mat *newmat)
889 {
890   PetscErrorCode ierr;
891   PetscMPIInt    size;
892   MPI_Comm       comm;
893   Mat            B=*newmat;
894   Mat_MUMPS      *mumps;
895   void           (*f)(void);
896 
897   PetscFunctionBegin;
898   if (B != A) {
899     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
900   }
901 
902   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
903   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
904 
905   mumps->MatDuplicate              = A->ops->duplicate;
906   mumps->MatView                   = A->ops->view;
907   mumps->MatAssemblyEnd            = A->ops->assemblyend;
908   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
909   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
910   mumps->MatDestroy                = A->ops->destroy;
911   mumps->specialdestroy            = MatDestroy_SBAIJMUMPS;
912   mumps->CleanUpMUMPS              = PETSC_FALSE;
913   mumps->isAIJ                     = PETSC_FALSE;
914 
915   B->spptr                         = (void*)mumps;
916   B->ops->duplicate                = MatDuplicate_MUMPS;
917   B->ops->view                     = MatView_AIJMUMPS;
918   B->ops->assemblyend              = MatAssemblyEnd_SBAIJMUMPS;
919   B->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
920   B->ops->destroy                  = MatDestroy_MUMPS;
921 
922   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
923   if (size == 1) {
924     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C",
925                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
926     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C",
927                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
928   } else {
929   /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */
930     ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr);
931     if (f) { /* This case should always be true when this routine is called */
932       mumps->MatPreallocate = (PetscErrorCode (*)(Mat,int,int,int*,int,int*))f;
933       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
934                                                "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS",
935                                                MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr);
936     }
937     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C",
938                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
939     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C",
940                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
941   }
942 
943   PetscLogInfo(0,"Using MUMPS for Cholesky factorization and solves.");
944   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
945   *newmat = B;
946   PetscFunctionReturn(0);
947 }
948 EXTERN_C_END
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatDuplicate_MUMPS"
952 PetscErrorCode MatDuplicate_MUMPS(Mat A, MatDuplicateOption op, Mat *M) {
953   PetscErrorCode ierr;
954   Mat_MUMPS   *lu=(Mat_MUMPS *)A->spptr;
955 
956   PetscFunctionBegin;
957   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
958   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr);
959   PetscFunctionReturn(0);
960 }
961 
962 /*MC
963   MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for
964   distributed and sequential matrices via the external package MUMPS.
965 
966   If MUMPS is installed (see the manual for instructions
967   on how to declare the existence of external packages),
968   a matrix type can be constructed which invokes MUMPS solvers.
969   After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS).
970 
971   If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ.
972   Otherwise, this matrix type inherits from MATMPISBAIJ.  Hence for single process communicators,
973   MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
974   for communicators controlling multiple processes.  It is recommended that you call both of
975   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
976   conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size)
977   without data copy.
978 
979   Options Database Keys:
980 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions()
981 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
982 . -mat_mumps_icntl_4 <0,...,4> - print level
983 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
984 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
985 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
986 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
987 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
988 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
989 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
990 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
991 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
992 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
993 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
994 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
995 
996   Level: beginner
997 
998 .seealso: MATAIJMUMPS
999 M*/
1000 
1001 EXTERN_C_BEGIN
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatCreate_SBAIJMUMPS"
1004 PetscErrorCode MatCreate_SBAIJMUMPS(Mat A)
1005 {
1006   PetscErrorCode ierr;
1007   int size;
1008 
1009   PetscFunctionBegin;
1010   /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */
1011   /*   and SBAIJMUMPS types */
1012   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr);
1013   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
1014   if (size == 1) {
1015     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1016   } else {
1017     ierr   = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1018   }
1019   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr);
1020   PetscFunctionReturn(0);
1021 }
1022 EXTERN_C_END
1023