xref: /petsc/src/mat/impls/aij/mpi/mumps/mumps.c (revision 5968eb51c36a0eee2a17bcd341216da8f7bdf0eb)
1 /*$Id: mumps.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2 /*
3     Provides an interface to the MUMPS_4.3.1 sparse solver
4 */
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #include "src/mat/impls/sbaij/seq/sbaij.h"
8 #include "src/mat/impls/sbaij/mpi/mpisbaij.h"
9 
10 EXTERN_C_BEGIN
11 #if defined(PETSC_USE_COMPLEX)
12 #include "zmumps_c.h"
13 #else
14 #include "dmumps_c.h"
15 #endif
16 EXTERN_C_END
17 #define JOB_INIT -1
18 #define JOB_END -2
19 /* macros s.t. indices match MUMPS documentation */
20 #define ICNTL(I) icntl[(I)-1]
21 #define CNTL(I) cntl[(I)-1]
22 #define INFOG(I) infog[(I)-1]
23 #define INFO(I) info[(I)-1]
24 #define RINFOG(I) rinfog[(I)-1]
25 #define RINFO(I) rinfo[(I)-1]
26 
27 typedef struct {
28 #if defined(PETSC_USE_COMPLEX)
29   ZMUMPS_STRUC_C id;
30 #else
31   DMUMPS_STRUC_C id;
32 #endif
33   MatStructure   matstruc;
34   int            myid,size,*irn,*jcn,sym;
35   PetscScalar    *val;
36   MPI_Comm       comm_mumps;
37 
38   PetscTruth     isAIJ,CleanUpMUMPS;
39   int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
40   int (*MatView)(Mat,PetscViewer);
41   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
42   int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
43   int (*MatCholeskyFactorSymbolic)(Mat,IS,MatFactorInfo*,Mat*);
44   int (*MatDestroy)(Mat);
45   int (*specialdestroy)(Mat);
46   int (*MatPreallocate)(Mat,int,int,int*,int,int*);
47 } Mat_MUMPS;
48 
49 EXTERN int MatDuplicate_AIJMUMPS(Mat,MatDuplicateOption,Mat*);
50 EXTERN int MatDuplicate_SBAIJMUMPS(Mat,MatDuplicateOption,Mat*);
51 EXTERN_C_BEGIN
52 int MatConvert_SBAIJ_SBAIJMUMPS(Mat,const MatType,Mat*);
53 EXTERN_C_END
54 /* convert Petsc mpiaij matrix to triples: row[nz], col[nz], val[nz] */
55 /*
56   input:
57     A       - matrix in mpiaij or mpisbaij (bs=1) format
58     shift   - 0: C style output triple; 1: Fortran style output triple.
59     valOnly - FALSE: spaces are allocated and values are set for the triple
60               TRUE:  only the values in v array are updated
61   output:
62     nnz     - dim of r, c, and v (number of local nonzero entries of A)
63     r, c, v - row and col index, matrix values (matrix triples)
64  */
65 int MatConvertToTriples(Mat A,int shift,PetscTruth valOnly,int *nnz,int **r, int **c, PetscScalar **v) {
66   int         *ai, *aj, *bi, *bj, rstart,nz, *garray;
67   int         ierr,i,j,jj,jB,irow,m=A->m,*ajj,*bjj,countA,countB,colA_start,jcol;
68   int         *row,*col;
69   PetscScalar *av, *bv,*val;
70   Mat_MUMPS   *mumps=(Mat_MUMPS*)A->spptr;
71 
72   PetscFunctionBegin;
73   if (mumps->isAIJ){
74     Mat_MPIAIJ    *mat =  (Mat_MPIAIJ*)A->data;
75     Mat_SeqAIJ    *aa=(Mat_SeqAIJ*)(mat->A)->data;
76     Mat_SeqAIJ    *bb=(Mat_SeqAIJ*)(mat->B)->data;
77     nz = aa->nz + bb->nz;
78     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart;
79     garray = mat->garray;
80     av=aa->a; bv=bb->a;
81 
82   } else {
83     Mat_MPISBAIJ  *mat =  (Mat_MPISBAIJ*)A->data;
84     Mat_SeqSBAIJ  *aa=(Mat_SeqSBAIJ*)(mat->A)->data;
85     Mat_SeqBAIJ    *bb=(Mat_SeqBAIJ*)(mat->B)->data;
86     if (mat->bs > 1) SETERRQ1(PETSC_ERR_SUP," bs=%d is not supported yet\n", mat->bs);
87     nz = aa->nz + bb->nz;
88     ai=aa->i; aj=aa->j; bi=bb->i; bj=bb->j; rstart= mat->rstart;
89     garray = mat->garray;
90     av=aa->a; bv=bb->a;
91   }
92 
93   if (!valOnly){
94     ierr = PetscMalloc(nz*sizeof(int),&row);CHKERRQ(ierr);
95     ierr = PetscMalloc(nz*sizeof(int),&col);CHKERRQ(ierr);
96     ierr = PetscMalloc(nz*sizeof(PetscScalar),&val);CHKERRQ(ierr);
97     *r = row; *c = col; *v = val;
98   } else {
99     row = *r; col = *c; val = *v;
100   }
101   *nnz = nz;
102 
103   jj = 0; irow = rstart;
104   for ( i=0; i<m; i++ ) {
105     ajj = aj + ai[i];                 /* ptr to the beginning of this row */
106     countA = ai[i+1] - ai[i];
107     countB = bi[i+1] - bi[i];
108     bjj = bj + bi[i];
109 
110     /* get jB, the starting local col index for the 2nd B-part */
111     colA_start = rstart + ajj[0]; /* the smallest col index for A */
112     j=-1;
113     do {
114       j++;
115       if (j == countB) break;
116       jcol = garray[bjj[j]];
117     } while (jcol < colA_start);
118     jB = j;
119 
120     /* B-part, smaller col index */
121     colA_start = rstart + ajj[0]; /* the smallest col index for A */
122     for (j=0; j<jB; j++){
123       jcol = garray[bjj[j]];
124       if (!valOnly){
125         row[jj] = irow + shift; col[jj] = jcol + shift;
126 
127       }
128       val[jj++] = *bv++;
129     }
130     /* A-part */
131     for (j=0; j<countA; j++){
132       if (!valOnly){
133         row[jj] = irow + shift; col[jj] = rstart + ajj[j] + shift;
134       }
135       val[jj++] = *av++;
136     }
137     /* B-part, larger col index */
138     for (j=jB; j<countB; j++){
139       if (!valOnly){
140         row[jj] = irow + shift; col[jj] = garray[bjj[j]] + shift;
141       }
142       val[jj++] = *bv++;
143     }
144     irow++;
145   }
146 
147   PetscFunctionReturn(0);
148 }
149 
150 EXTERN_C_BEGIN
151 #undef __FUNCT__
152 #define __FUNCT__ "MatConvert_MUMPS_Base"
153 int MatConvert_MUMPS_Base(Mat A,const MatType type,Mat *newmat) {
154   int       ierr;
155   Mat       B=*newmat;
156   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
157 
158   PetscFunctionBegin;
159   if (B != A) {
160     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
161   }
162   B->ops->duplicate              = mumps->MatDuplicate;
163   B->ops->view                   = mumps->MatView;
164   B->ops->assemblyend            = mumps->MatAssemblyEnd;
165   B->ops->lufactorsymbolic       = mumps->MatLUFactorSymbolic;
166   B->ops->choleskyfactorsymbolic = mumps->MatCholeskyFactorSymbolic;
167   B->ops->destroy                = mumps->MatDestroy;
168   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
169   ierr = PetscFree(mumps);CHKERRQ(ierr);
170   *newmat = B;
171   PetscFunctionReturn(0);
172 }
173 EXTERN_C_END
174 
175 #undef __FUNCT__
176 #define __FUNCT__ "MatDestroy_MUMPS"
177 int MatDestroy_MUMPS(Mat A) {
178   Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
179   int       ierr,size=lu->size;
180   int       (*specialdestroy)(Mat);
181   PetscFunctionBegin;
182   if (lu->CleanUpMUMPS) {
183     /* Terminate instance, deallocate memories */
184     lu->id.job=JOB_END;
185 #if defined(PETSC_USE_COMPLEX)
186     zmumps_c(&lu->id);
187 #else
188     dmumps_c(&lu->id);
189 #endif
190     if (lu->irn) {
191       ierr = PetscFree(lu->irn);CHKERRQ(ierr);
192     }
193     if (lu->jcn) {
194       ierr = PetscFree(lu->jcn);CHKERRQ(ierr);
195     }
196     if (size>1 && lu->val) {
197       ierr = PetscFree(lu->val);CHKERRQ(ierr);
198     }
199     ierr = MPI_Comm_free(&(lu->comm_mumps));CHKERRQ(ierr);
200   }
201   specialdestroy = lu->specialdestroy;
202   ierr = (*specialdestroy)(A);CHKERRQ(ierr);
203   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
204   PetscFunctionReturn(0);
205 }
206 
207 #undef __FUNCT__
208 #define __FUNCT__ "MatDestroy_AIJMUMPS"
209 int MatDestroy_AIJMUMPS(Mat A) {
210   int ierr, size;
211 
212   PetscFunctionBegin;
213   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
214   if (size==1) {
215     ierr = MatConvert_MUMPS_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
216   } else {
217     ierr = MatConvert_MUMPS_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
218   }
219   PetscFunctionReturn(0);
220 }
221 
222 #undef __FUNCT__
223 #define __FUNCT__ "MatDestroy_SBAIJMUMPS"
224 int MatDestroy_SBAIJMUMPS(Mat A) {
225   int ierr, size;
226 
227   PetscFunctionBegin;
228   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
229   if (size==1) {
230     ierr = MatConvert_MUMPS_Base(A,MATSEQSBAIJ,&A);CHKERRQ(ierr);
231   } else {
232     ierr = MatConvert_MUMPS_Base(A,MATMPISBAIJ,&A);CHKERRQ(ierr);
233   }
234   PetscFunctionReturn(0);
235 }
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "MatFactorInfo_MUMPS"
239 int MatFactorInfo_MUMPS(Mat A,PetscViewer viewer) {
240   Mat_MUMPS *lu=(Mat_MUMPS*)A->spptr;
241   int       ierr;
242 
243   PetscFunctionBegin;
244   ierr = PetscViewerASCIIPrintf(viewer,"MUMPS run parameters:\n");CHKERRQ(ierr);
245   ierr = PetscViewerASCIIPrintf(viewer,"  SYM (matrix type):                  %d \n",lu->id.sym);CHKERRQ(ierr);
246   ierr = PetscViewerASCIIPrintf(viewer,"  PAR (host participation):           %d \n",lu->id.par);CHKERRQ(ierr);
247   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(4) (level of printing):       %d \n",lu->id.ICNTL(4));CHKERRQ(ierr);
248   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(5) (input mat struct):        %d \n",lu->id.ICNTL(5));CHKERRQ(ierr);
249   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(6) (matrix prescaling):       %d \n",lu->id.ICNTL(6));CHKERRQ(ierr);
250   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(7) (matrix ordering):         %d \n",lu->id.ICNTL(7));CHKERRQ(ierr);
251   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(9) (A/A^T x=b is solved):     %d \n",lu->id.ICNTL(9));CHKERRQ(ierr);
252   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(10) (max num of refinements): %d \n",lu->id.ICNTL(10));CHKERRQ(ierr);
253   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(11) (error analysis):         %d \n",lu->id.ICNTL(11));CHKERRQ(ierr);
254   if (lu->myid == 0 && lu->id.ICNTL(11)>0) {
255     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(4) (inf norm of input mat):        %g\n",lu->id.RINFOG(4));CHKERRQ(ierr);
256     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(5) (inf norm of solution):         %g\n",lu->id.RINFOG(5));CHKERRQ(ierr);
257     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(6) (inf norm of residual):         %g\n",lu->id.RINFOG(6));CHKERRQ(ierr);
258     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(7),RINFOG(8) (backward error est): %g, %g\n",lu->id.RINFOG(7),lu->id.RINFOG(8));CHKERRQ(ierr);
259     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(9) (error estimate):               %g \n",lu->id.RINFOG(9));CHKERRQ(ierr);
260     ierr = PetscPrintf(PETSC_COMM_SELF,"        RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n",lu->id.RINFOG(10),lu->id.RINFOG(11));CHKERRQ(ierr);
261 
262   }
263   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(12) (efficiency control):                         %d \n",lu->id.ICNTL(12));CHKERRQ(ierr);
264   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(13) (efficiency control):                         %d \n",lu->id.ICNTL(13));CHKERRQ(ierr);
265   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(14) (percentage of estimated workspace increase): %d \n",lu->id.ICNTL(14));CHKERRQ(ierr);
266   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(15) (efficiency control):                         %d \n",lu->id.ICNTL(15));CHKERRQ(ierr);
267   ierr = PetscViewerASCIIPrintf(viewer,"  ICNTL(18) (input mat struct):                           %d \n",lu->id.ICNTL(18));CHKERRQ(ierr);
268 
269   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(1) (relative pivoting threshold):      %g \n",lu->id.CNTL(1));CHKERRQ(ierr);
270   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(2) (stopping criterion of refinement): %g \n",lu->id.CNTL(2));CHKERRQ(ierr);
271   ierr = PetscViewerASCIIPrintf(viewer,"  CNTL(3) (absolute pivoting threshold):      %g \n",lu->id.CNTL(3));CHKERRQ(ierr);
272 
273   /* infomation local to each processor */
274   if (lu->myid == 0) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(1) (local estimated flops for the elimination after analysis): \n");CHKERRQ(ierr);
275   ierr = PetscSynchronizedPrintf(A->comm,"             [%d] %g \n",lu->myid,lu->id.RINFO(1));CHKERRQ(ierr);
276   ierr = PetscSynchronizedFlush(A->comm);
277   if (lu->myid == 0) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(2) (local estimated flops for the assembly after factorization): \n");CHKERRQ(ierr);
278   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(2));CHKERRQ(ierr);
279   ierr = PetscSynchronizedFlush(A->comm);
280   if (lu->myid == 0) ierr = PetscPrintf(PETSC_COMM_SELF, "      RINFO(3) (local estimated flops for the elimination after factorization): \n");CHKERRQ(ierr);
281   ierr = PetscSynchronizedPrintf(A->comm,"             [%d]  %g \n",lu->myid,lu->id.RINFO(3));CHKERRQ(ierr);
282   ierr = PetscSynchronizedFlush(A->comm);
283 
284   if (lu->myid == 0){ /* information from the host */
285     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(1) (global estimated flops for the elimination after analysis): %g \n",lu->id.RINFOG(1));CHKERRQ(ierr);
286     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(2) (global estimated flops for the assembly after factorization): %g \n",lu->id.RINFOG(2));CHKERRQ(ierr);
287     ierr = PetscViewerASCIIPrintf(viewer,"  RINFOG(3) (global estimated flops for the elimination after factorization): %g \n",lu->id.RINFOG(3));CHKERRQ(ierr);
288 
289     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(3) (estimated real workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(3));CHKERRQ(ierr);
290     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d \n",lu->id.INFOG(4));CHKERRQ(ierr);
291     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(5) (estimated maximum front size in the complete tree): %d \n",lu->id.INFOG(5));CHKERRQ(ierr);
292     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(6) (number of nodes in the complete tree): %d \n",lu->id.INFOG(6));CHKERRQ(ierr);
293     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(7) (ordering option effectively uese after analysis): %d \n",lu->id.INFOG(7));CHKERRQ(ierr);
294     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d \n",lu->id.INFOG(8));CHKERRQ(ierr);
295     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(9) (total real space store the matrix factors after analysis): %d \n",lu->id.INFOG(9));CHKERRQ(ierr);
296     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(10) (total integer space store the matrix factors after analysis): %d \n",lu->id.INFOG(10));CHKERRQ(ierr);
297     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(11) (order of largest frontal matrix): %d \n",lu->id.INFOG(11));CHKERRQ(ierr);
298     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(12) (number of off-diagonal pivots): %d \n",lu->id.INFOG(12));CHKERRQ(ierr);
299     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(13) (number of delayed pivots after factorization): %d \n",lu->id.INFOG(13));CHKERRQ(ierr);
300     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(14) (number of memory compress after factorization): %d \n",lu->id.INFOG(14));CHKERRQ(ierr);
301     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(15) (number of steps of iterative refinement after solution): %d \n",lu->id.INFOG(15));CHKERRQ(ierr);
302     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(16) (estimated size (in million of bytes) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d \n",lu->id.INFOG(16));CHKERRQ(ierr);
303     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d \n",lu->id.INFOG(17));CHKERRQ(ierr);
304     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d \n",lu->id.INFOG(18));CHKERRQ(ierr);
305     ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d \n",lu->id.INFOG(19));CHKERRQ(ierr);
306      ierr = PetscViewerASCIIPrintf(viewer,"  INFOG(20) (estimated number of entries in the factors): %d \n",lu->id.INFOG(20));CHKERRQ(ierr);
307   }
308 
309   PetscFunctionReturn(0);
310 }
311 
312 #undef __FUNCT__
313 #define __FUNCT__ "MatView_AIJMUMPS"
314 int MatView_AIJMUMPS(Mat A,PetscViewer viewer) {
315   int               ierr;
316   PetscTruth        isascii;
317   PetscViewerFormat format;
318   Mat_MUMPS         *mumps=(Mat_MUMPS*)(A->spptr);
319 
320   PetscFunctionBegin;
321   ierr = (*mumps->MatView)(A,viewer);CHKERRQ(ierr);
322 
323   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
324   if (isascii) {
325     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
326     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
327       ierr = MatFactorInfo_MUMPS(A,viewer);CHKERRQ(ierr);
328     }
329   }
330   PetscFunctionReturn(0);
331 }
332 
333 #undef __FUNCT__
334 #define __FUNCT__ "MatSolve_AIJMUMPS"
335 int MatSolve_AIJMUMPS(Mat A,Vec b,Vec x) {
336   Mat_MUMPS   *lu=(Mat_MUMPS*)A->spptr;
337   PetscScalar *array;
338   Vec         x_seq;
339   IS          iden;
340   VecScatter  scat;
341   int         ierr;
342 
343   PetscFunctionBegin;
344   if (lu->size > 1){
345     if (!lu->myid){
346       ierr = VecCreateSeq(PETSC_COMM_SELF,A->N,&x_seq);CHKERRQ(ierr);
347       ierr = ISCreateStride(PETSC_COMM_SELF,A->N,0,1,&iden);CHKERRQ(ierr);
348     } else {
349       ierr = VecCreateSeq(PETSC_COMM_SELF,0,&x_seq);CHKERRQ(ierr);
350       ierr = ISCreateStride(PETSC_COMM_SELF,0,0,1,&iden);CHKERRQ(ierr);
351     }
352     ierr = VecScatterCreate(b,iden,x_seq,iden,&scat);CHKERRQ(ierr);
353     ierr = ISDestroy(iden);CHKERRQ(ierr);
354 
355     ierr = VecScatterBegin(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
356     ierr = VecScatterEnd(b,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
357     if (!lu->myid) {ierr = VecGetArray(x_seq,&array);CHKERRQ(ierr);}
358   } else {  /* size == 1 */
359     ierr = VecCopy(b,x);CHKERRQ(ierr);
360     ierr = VecGetArray(x,&array);CHKERRQ(ierr);
361   }
362   if (!lu->myid) { /* define rhs on the host */
363 #if defined(PETSC_USE_COMPLEX)
364     lu->id.rhs = (mumps_double_complex*)array;
365 #else
366     lu->id.rhs = array;
367 #endif
368   }
369 
370   /* solve phase */
371   lu->id.job=3;
372 #if defined(PETSC_USE_COMPLEX)
373   zmumps_c(&lu->id);
374 #else
375   dmumps_c(&lu->id);
376 #endif
377   if (lu->id.INFOG(1) < 0) {
378     SETERRQ1(1,"Error reported by MUMPS in solve phase: INFOG(1)=%d\n",lu->id.INFOG(1));
379   }
380 
381   /* convert mumps solution x_seq to petsc mpi x */
382   if (lu->size > 1) {
383     if (!lu->myid){
384       ierr = VecRestoreArray(x_seq,&array);CHKERRQ(ierr);
385     }
386     ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
387     ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
388     ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
389     ierr = VecDestroy(x_seq);CHKERRQ(ierr);
390   } else {
391     ierr = VecRestoreArray(x,&array);CHKERRQ(ierr);
392   }
393 
394   PetscFunctionReturn(0);
395 }
396 
397 /*
398   input:
399    F:        numeric factor
400   output:
401    nneg:     total number of negative pivots
402    nzero:    0
403    npos:     (global dimension of F) - nneg
404 */
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatGetInertia_SBAIJMUMPS"
408 int MatGetInertia_SBAIJMUMPS(Mat F,int *nneg,int *nzero,int *npos)
409 {
410   Mat_MUMPS  *lu =(Mat_MUMPS*)F->spptr;
411   int        ierr,neg,zero,pos,size;
412 
413   PetscFunctionBegin;
414   ierr = MPI_Comm_size(F->comm,&size);CHKERRQ(ierr);
415   /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */
416   if (size > 1 && lu->id.ICNTL(13) != 1){
417     SETERRQ1(1,"ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia\n",lu->id.INFOG(13));
418   }
419   if (nneg){
420     if (!lu->myid){
421       *nneg = lu->id.INFOG(12);
422     }
423     ierr = MPI_Bcast(nneg,1,MPI_INT,0,lu->comm_mumps);CHKERRQ(ierr);
424   }
425   if (nzero) *nzero = 0;
426   if (npos)  *npos  = F->M - (*nneg);
427   PetscFunctionReturn(0);
428 }
429 
430 #undef __FUNCT__
431 #define __FUNCT__ "MatFactorNumeric_MPIAIJMUMPS"
432 int MatFactorNumeric_AIJMUMPS(Mat A,Mat *F) {
433   Mat_MUMPS  *lu =(Mat_MUMPS*)(*F)->spptr;
434   Mat_MUMPS  *lua=(Mat_MUMPS*)(A)->spptr;
435   int        rnz,nnz,ierr,nz,i,M=A->M,*ai,*aj,icntl;
436   PetscTruth valOnly,flg;
437 
438   PetscFunctionBegin;
439   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
440     (*F)->ops->solve    = MatSolve_AIJMUMPS;
441 
442     /* Initialize a MUMPS instance */
443     ierr = MPI_Comm_rank(A->comm, &lu->myid);
444     ierr = MPI_Comm_size(A->comm,&lu->size);CHKERRQ(ierr);
445     lua->myid = lu->myid; lua->size = lu->size;
446     lu->id.job = JOB_INIT;
447     ierr = MPI_Comm_dup(A->comm,&(lu->comm_mumps));CHKERRQ(ierr);
448     lu->id.comm_fortran = lu->comm_mumps;
449 
450     /* Set mumps options */
451     ierr = PetscOptionsBegin(A->comm,A->prefix,"MUMPS Options","Mat");CHKERRQ(ierr);
452     lu->id.par=1;  /* host participates factorizaton and solve */
453     lu->id.sym=lu->sym;
454     if (lu->sym == 2){
455       ierr = PetscOptionsInt("-mat_mumps_sym","SYM: (1,2)","None",lu->id.sym,&icntl,&flg);CHKERRQ(ierr);
456       if (flg && icntl == 1) lu->id.sym=icntl;  /* matrix is spd */
457     }
458 #if defined(PETSC_USE_COMPLEX)
459   zmumps_c(&lu->id);
460 #else
461   dmumps_c(&lu->id);
462 #endif
463 
464     if (lu->size == 1){
465       lu->id.ICNTL(18) = 0;   /* centralized assembled matrix input */
466     } else {
467       lu->id.ICNTL(18) = 3;   /* distributed assembled matrix input */
468     }
469 
470     icntl=-1;
471     ierr = PetscOptionsInt("-mat_mumps_icntl_4","ICNTL(4): level of printing (0 to 4)","None",lu->id.ICNTL(4),&icntl,&flg);CHKERRQ(ierr);
472     if (flg && icntl > 0) {
473       lu->id.ICNTL(4)=icntl; /* and use mumps default icntl(i), i=1,2,3 */
474     } else { /* no output */
475       lu->id.ICNTL(1) = 0;  /* error message, default= 6 */
476       lu->id.ICNTL(2) = -1; /* output stream for diagnostic printing, statistics, and warning. default=0 */
477       lu->id.ICNTL(3) = -1; /* output stream for global information, default=6 */
478       lu->id.ICNTL(4) = 0;  /* level of printing, 0,1,2,3,4, default=2 */
479     }
480     ierr = PetscOptionsInt("-mat_mumps_icntl_6","ICNTL(6): matrix prescaling (0 to 7)","None",lu->id.ICNTL(6),&lu->id.ICNTL(6),PETSC_NULL);CHKERRQ(ierr);
481     icntl=-1;
482     ierr = PetscOptionsInt("-mat_mumps_icntl_7","ICNTL(7): matrix ordering (0 to 7)","None",lu->id.ICNTL(7),&icntl,&flg);CHKERRQ(ierr);
483     if (flg) {
484       if (icntl== 1){
485         SETERRQ(PETSC_ERR_SUP,"pivot order be set by the user in PERM_IN -- not supported by the PETSc/MUMPS interface\n");
486       } else {
487         lu->id.ICNTL(7) = icntl;
488       }
489     }
490     ierr = PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): A or A^T x=b to be solved. 1: A; otherwise: A^T","None",lu->id.ICNTL(9),&lu->id.ICNTL(9),PETSC_NULL);CHKERRQ(ierr);
491     ierr = PetscOptionsInt("-mat_mumps_icntl_10","ICNTL(10): max num of refinements","None",lu->id.ICNTL(10),&lu->id.ICNTL(10),PETSC_NULL);CHKERRQ(ierr);
492     ierr = PetscOptionsInt("-mat_mumps_icntl_11","ICNTL(11): error analysis, a positive value returns statistics (by -ksp_view)","None",lu->id.ICNTL(11),&lu->id.ICNTL(11),PETSC_NULL);CHKERRQ(ierr);
493     ierr = PetscOptionsInt("-mat_mumps_icntl_12","ICNTL(12): efficiency control","None",lu->id.ICNTL(12),&lu->id.ICNTL(12),PETSC_NULL);CHKERRQ(ierr);
494     ierr = PetscOptionsInt("-mat_mumps_icntl_13","ICNTL(13): efficiency control","None",lu->id.ICNTL(13),&lu->id.ICNTL(13),PETSC_NULL);CHKERRQ(ierr);
495     ierr = PetscOptionsInt("-mat_mumps_icntl_14","ICNTL(14): percentage of estimated workspace increase","None",lu->id.ICNTL(14),&lu->id.ICNTL(14),PETSC_NULL);CHKERRQ(ierr);
496     ierr = PetscOptionsInt("-mat_mumps_icntl_15","ICNTL(15): efficiency control","None",lu->id.ICNTL(15),&lu->id.ICNTL(15),PETSC_NULL);CHKERRQ(ierr);
497 
498     /*
499     ierr = PetscOptionsInt("-mat_mumps_icntl_16","ICNTL(16): 1: rank detection; 2: rank detection and nullspace","None",lu->id.ICNTL(16),&icntl,&flg);CHKERRQ(ierr);
500     if (flg){
501       if (icntl >-1 && icntl <3 ){
502         if (lu->myid==0) lu->id.ICNTL(16) = icntl;
503       } else {
504         SETERRQ1(PETSC_ERR_SUP,"ICNTL(16)=%d -- not supported\n",icntl);
505       }
506     }
507     */
508 
509     ierr = PetscOptionsReal("-mat_mumps_cntl_1","CNTL(1): relative pivoting threshold","None",lu->id.CNTL(1),&lu->id.CNTL(1),PETSC_NULL);CHKERRQ(ierr);
510     ierr = PetscOptionsReal("-mat_mumps_cntl_2","CNTL(2): stopping criterion of refinement","None",lu->id.CNTL(2),&lu->id.CNTL(2),PETSC_NULL);CHKERRQ(ierr);
511     ierr = PetscOptionsReal("-mat_mumps_cntl_3","CNTL(3): absolute pivoting threshold","None",lu->id.CNTL(3),&lu->id.CNTL(3),PETSC_NULL);CHKERRQ(ierr);
512     PetscOptionsEnd();
513   }
514 
515   /* define matrix A */
516   switch (lu->id.ICNTL(18)){
517   case 0:  /* centralized assembled matrix input (size=1) */
518     if (!lu->myid) {
519       if (lua->isAIJ){
520         Mat_SeqAIJ   *aa = (Mat_SeqAIJ*)A->data;
521         nz               = aa->nz;
522         ai = aa->i; aj = aa->j; lu->val = aa->a;
523       } else {
524         Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ*)A->data;
525         nz                  =  aa->nz;
526         ai = aa->i; aj = aa->j; lu->val = aa->a;
527       }
528       if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization, get irn and jcn */
529         ierr = PetscMalloc(nz*sizeof(int),&lu->irn);CHKERRQ(ierr);
530         ierr = PetscMalloc(nz*sizeof(int),&lu->jcn);CHKERRQ(ierr);
531         nz = 0;
532         for (i=0; i<M; i++){
533           rnz = ai[i+1] - ai[i];
534           while (rnz--) {  /* Fortran row/col index! */
535             lu->irn[nz] = i+1; lu->jcn[nz] = (*aj)+1; aj++; nz++;
536           }
537         }
538       }
539     }
540     break;
541   case 3:  /* distributed assembled matrix input (size>1) */
542     if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
543       valOnly = PETSC_FALSE;
544     } else {
545       valOnly = PETSC_TRUE; /* only update mat values, not row and col index */
546     }
547     ierr = MatConvertToTriples(A,1,valOnly, &nnz, &lu->irn, &lu->jcn, &lu->val);CHKERRQ(ierr);
548     break;
549   default: SETERRQ(PETSC_ERR_SUP,"Matrix input format is not supported by MUMPS.");
550   }
551 
552   /* analysis phase */
553   if (lu->matstruc == DIFFERENT_NONZERO_PATTERN){
554      lu->id.n = M;
555     switch (lu->id.ICNTL(18)){
556     case 0:  /* centralized assembled matrix input */
557       if (!lu->myid) {
558         lu->id.nz =nz; lu->id.irn=lu->irn; lu->id.jcn=lu->jcn;
559         if (lu->id.ICNTL(6)>1){
560 #if defined(PETSC_USE_COMPLEX)
561           lu->id.a = (mumps_double_complex*)lu->val;
562 #else
563           lu->id.a = lu->val;
564 #endif
565         }
566       }
567       break;
568     case 3:  /* distributed assembled matrix input (size>1) */
569       lu->id.nz_loc = nnz;
570       lu->id.irn_loc=lu->irn; lu->id.jcn_loc=lu->jcn;
571       if (lu->id.ICNTL(6)>1) {
572 #if defined(PETSC_USE_COMPLEX)
573         lu->id.a_loc = (mumps_double_complex*)lu->val;
574 #else
575         lu->id.a_loc = lu->val;
576 #endif
577       }
578       break;
579     }
580     lu->id.job=1;
581 #if defined(PETSC_USE_COMPLEX)
582   zmumps_c(&lu->id);
583 #else
584   dmumps_c(&lu->id);
585 #endif
586     if (lu->id.INFOG(1) < 0) {
587       SETERRQ1(1,"Error reported by MUMPS in analysis phase: INFOG(1)=%d\n",lu->id.INFOG(1));
588     }
589   }
590 
591   /* numerical factorization phase */
592   if(lu->id.ICNTL(18) == 0) {
593     if (!lu->myid) {
594 #if defined(PETSC_USE_COMPLEX)
595       lu->id.a = (mumps_double_complex*)lu->val;
596 #else
597       lu->id.a = lu->val;
598 #endif
599     }
600   } else {
601 #if defined(PETSC_USE_COMPLEX)
602     lu->id.a_loc = (mumps_double_complex*)lu->val;
603 #else
604     lu->id.a_loc = lu->val;
605 #endif
606   }
607   lu->id.job=2;
608 #if defined(PETSC_USE_COMPLEX)
609   zmumps_c(&lu->id);
610 #else
611   dmumps_c(&lu->id);
612 #endif
613   if (lu->id.INFOG(1) < 0) {
614     SETERRQ2(1,"Error reported by MUMPS in numerical factorization phase: INFO(1)=%d, INFO(2)=%d\n",lu->id.INFO(1),lu->id.INFO(2));
615   }
616 
617   if (lu->myid==0 && lu->id.ICNTL(16) > 0){
618     SETERRQ1(1,"  lu->id.ICNTL(16):=%d\n",lu->id.INFOG(16));
619   }
620 
621   (*F)->assembled  = PETSC_TRUE;
622   lu->matstruc     = SAME_NONZERO_PATTERN;
623   lu->CleanUpMUMPS = PETSC_TRUE;
624   PetscFunctionReturn(0);
625 }
626 
627 /* Note the Petsc r and c permutations are ignored */
628 #undef __FUNCT__
629 #define __FUNCT__ "MatLUFactorSymbolic_AIJMUMPS"
630 int MatLUFactorSymbolic_AIJMUMPS(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) {
631   Mat       B;
632   Mat_MUMPS *lu;
633   int       ierr;
634 
635   PetscFunctionBegin;
636 
637   /* Create the factorization matrix */
638   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr);
639   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
640   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
641   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
642 
643   B->ops->lufactornumeric = MatFactorNumeric_AIJMUMPS;
644   B->factor               = FACTOR_LU;
645   lu                      = (Mat_MUMPS*)B->spptr;
646   lu->sym                 = 0;
647   lu->matstruc            = DIFFERENT_NONZERO_PATTERN;
648 
649   *F = B;
650   PetscFunctionReturn(0);
651 }
652 
653 /* Note the Petsc r permutation is ignored */
654 #undef __FUNCT__
655 #define __FUNCT__ "MatCholeskyFactorSymbolic_SBAIJMUMPS"
656 int MatCholeskyFactorSymbolic_SBAIJMUMPS(Mat A,IS r,MatFactorInfo *info,Mat *F) {
657   Mat       B;
658   Mat_MUMPS *lu;
659   int       ierr;
660 
661   PetscFunctionBegin;
662 
663   /* Create the factorization matrix */
664   ierr = MatCreate(A->comm,A->m,A->n,A->M,A->N,&B);CHKERRQ(ierr);
665   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
666   ierr = MatSeqSBAIJSetPreallocation(B,1,0,PETSC_NULL);CHKERRQ(ierr);
667   ierr = MatMPISBAIJSetPreallocation(B,1,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
668 
669   B->ops->choleskyfactornumeric = MatFactorNumeric_AIJMUMPS;
670   B->ops->getinertia            = MatGetInertia_SBAIJMUMPS;
671   B->factor                     = FACTOR_CHOLESKY;
672   lu                            = (Mat_MUMPS*)B->spptr;
673   lu->sym                       = 2;
674   lu->matstruc                  = DIFFERENT_NONZERO_PATTERN;
675 
676   *F = B;
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatAssemblyEnd_AIJMUMPS"
682 int MatAssemblyEnd_AIJMUMPS(Mat A,MatAssemblyType mode) {
683   int       ierr;
684   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
685 
686   PetscFunctionBegin;
687   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
688 
689   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
690   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
691   A->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
692   PetscFunctionReturn(0);
693 }
694 
695 EXTERN_C_BEGIN
696 #undef __FUNCT__
697 #define __FUNCT__ "MatConvert_AIJ_AIJMUMPS"
698 int MatConvert_AIJ_AIJMUMPS(Mat A,const MatType newtype,Mat *newmat) {
699   int       ierr,size;
700   MPI_Comm  comm;
701   Mat       B=*newmat;
702   Mat_MUMPS *mumps;
703 
704   PetscFunctionBegin;
705   if (B != A) {
706     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
707   }
708 
709   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
710   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
711 
712   mumps->MatDuplicate              = A->ops->duplicate;
713   mumps->MatView                   = A->ops->view;
714   mumps->MatAssemblyEnd            = A->ops->assemblyend;
715   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
716   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
717   mumps->MatDestroy                = A->ops->destroy;
718   mumps->specialdestroy            = MatDestroy_AIJMUMPS;
719   mumps->CleanUpMUMPS              = PETSC_FALSE;
720   mumps->isAIJ                     = PETSC_TRUE;
721 
722   B->spptr                         = (void *)mumps;
723   B->ops->duplicate                = MatDuplicate_AIJMUMPS;
724   B->ops->view                     = MatView_AIJMUMPS;
725   B->ops->assemblyend              = MatAssemblyEnd_AIJMUMPS;
726   B->ops->lufactorsymbolic         = MatLUFactorSymbolic_AIJMUMPS;
727   B->ops->destroy                  = MatDestroy_MUMPS;
728 
729   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
730   if (size == 1) {
731     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_aijmumps_C",
732                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
733     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_seqaij_C",
734                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
735   } else {
736     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_aijmumps_C",
737                                              "MatConvert_AIJ_AIJMUMPS",MatConvert_AIJ_AIJMUMPS);CHKERRQ(ierr);
738     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_aijmumps_mpiaij_C",
739                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
740   }
741 
742   PetscLogInfo(0,"Using MUMPS for LU factorization and solves.");
743   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
744   *newmat = B;
745   PetscFunctionReturn(0);
746 }
747 EXTERN_C_END
748 
749 #undef __FUNCT__
750 #define __FUNCT__ "MatDuplicate_AIJMUMPS"
751 int MatDuplicate_AIJMUMPS(Mat A, MatDuplicateOption op, Mat *M) {
752   int       ierr;
753   Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr;
754 
755   PetscFunctionBegin;
756   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
757   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr);
758   PetscFunctionReturn(0);
759 }
760 
761 /*MC
762   MATAIJMUMPS - MATAIJMUMPS = "aijmumps" - A matrix type providing direct solvers (LU) for distributed
763   and sequential matrices via the external package MUMPS.
764 
765   If MUMPS is installed (see the manual for instructions
766   on how to declare the existence of external packages),
767   a matrix type can be constructed which invokes MUMPS solvers.
768   After calling MatCreate(...,A), simply call MatSetType(A,MATAIJMUMPS).
769   This matrix type is only supported for double precision real.
770 
771   If created with a single process communicator, this matrix type inherits from MATSEQAIJ.
772   Otherwise, this matrix type inherits from MATMPIAIJ.  Hence for single process communicators,
773   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
774   for communicators controlling multiple processes.  It is recommended that you call both of
775   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
776   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
777   without data copy.
778 
779   Options Database Keys:
780 + -mat_type aijmumps - sets the matrix type to "aijmumps" during a call to MatSetFromOptions()
781 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
782 . -mat_mumps_icntl_4 <0,1,2,3,4> - print level
783 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
784 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
785 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
786 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
787 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
788 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
789 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
790 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
791 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
792 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
793 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
794 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
795 
796   Level: beginner
797 
798 .seealso: MATSBAIJMUMPS
799 M*/
800 
801 EXTERN_C_BEGIN
802 #undef __FUNCT__
803 #define __FUNCT__ "MatCreate_AIJMUMPS"
804 int MatCreate_AIJMUMPS(Mat A) {
805   int      ierr,size;
806   Mat      A_diag;
807   MPI_Comm comm;
808 
809   PetscFunctionBegin;
810   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
811   /*   and AIJMUMPS types */
812   ierr = PetscObjectChangeTypeName((PetscObject)A,MATAIJMUMPS);CHKERRQ(ierr);
813   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
814   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
815   if (size == 1) {
816     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
817   } else {
818     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
819     A_diag = ((Mat_MPIAIJ *)A->data)->A;
820     ierr   = MatConvert_AIJ_AIJMUMPS(A_diag,MATAIJMUMPS,&A_diag);CHKERRQ(ierr);
821   }
822   ierr = MatConvert_AIJ_AIJMUMPS(A,MATAIJMUMPS,&A);CHKERRQ(ierr);
823   PetscFunctionReturn(0);
824 }
825 EXTERN_C_END
826 
827 #undef __FUNCT__
828 #define __FUNCT__ "MatAssemblyEnd_SBAIJMUMPS"
829 int MatAssemblyEnd_SBAIJMUMPS(Mat A,MatAssemblyType mode) {
830   int       ierr;
831   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
832 
833   PetscFunctionBegin;
834   ierr = (*mumps->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
835   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
836   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
837   A->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
838   PetscFunctionReturn(0);
839 }
840 
841 EXTERN_C_BEGIN
842 #undef __FUNCT__
843 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS"
844 int MatMPISBAIJSetPreallocation_MPISBAIJMUMPS(Mat  B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
845 {
846   Mat       A;
847   Mat_MUMPS *mumps=(Mat_MUMPS*)B->spptr;
848   int       ierr;
849 
850   PetscFunctionBegin;
851   /*
852     After performing the MPISBAIJ Preallocation, we need to convert the local diagonal block matrix
853     into MUMPS type so that the block jacobi preconditioner (for example) can use MUMPS.  I would
854     like this to be done in the MatCreate routine, but the creation of this inner matrix requires
855     block size info so that PETSc can determine the local size properly.  The block size info is set
856     in the preallocation routine.
857   */
858   ierr = (*mumps->MatPreallocate)(B,bs,d_nz,d_nnz,o_nz,o_nnz);
859   A    = ((Mat_MPISBAIJ *)B->data)->A;
860   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 EXTERN_C_END
864 
865 EXTERN_C_BEGIN
866 #undef __FUNCT__
867 #define __FUNCT__ "MatConvert_SBAIJ_SBAIJMUMPS"
868 int MatConvert_SBAIJ_SBAIJMUMPS(Mat A,const MatType newtype,Mat *newmat) {
869   int       ierr,size;
870   MPI_Comm  comm;
871   Mat       B=*newmat;
872   Mat_MUMPS *mumps=(Mat_MUMPS*)A->spptr;
873   void      (*f)(void);
874 
875   PetscFunctionBegin;
876   if (B != A) {
877     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
878   }
879 
880   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
881   ierr = PetscNew(Mat_MUMPS,&mumps);CHKERRQ(ierr);
882 
883   mumps->MatDuplicate              = A->ops->duplicate;
884   mumps->MatView                   = A->ops->view;
885   mumps->MatAssemblyEnd            = A->ops->assemblyend;
886   mumps->MatLUFactorSymbolic       = A->ops->lufactorsymbolic;
887   mumps->MatCholeskyFactorSymbolic = A->ops->choleskyfactorsymbolic;
888   mumps->MatDestroy                = A->ops->destroy;
889   mumps->specialdestroy            = MatDestroy_SBAIJMUMPS;
890   mumps->CleanUpMUMPS              = PETSC_FALSE;
891   mumps->isAIJ                     = PETSC_FALSE;
892 
893   B->spptr                         = (void *)mumps;
894   B->ops->duplicate                = MatDuplicate_SBAIJMUMPS;
895   B->ops->view                     = MatView_AIJMUMPS;
896   B->ops->assemblyend              = MatAssemblyEnd_SBAIJMUMPS;
897   B->ops->choleskyfactorsymbolic   = MatCholeskyFactorSymbolic_SBAIJMUMPS;
898   B->ops->destroy                  = MatDestroy_MUMPS;
899 
900   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
901   if (size == 1) {
902     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqsbaij_sbaijmumps_C",
903                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
904     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_seqsbaij_C",
905                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
906   } else {
907   /* I really don't like needing to know the tag: MatMPISBAIJSetPreallocation_C */
908     ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",&f);CHKERRQ(ierr);
909     if (f) {
910       mumps->MatPreallocate = (int (*)(Mat,int,int,int*,int,int*))f;
911       ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
912                                                "MatMPISBAIJSetPreallocation_MPISBAIJMUMPS",
913                                                MatMPISBAIJSetPreallocation_MPISBAIJMUMPS);CHKERRQ(ierr);
914     }
915     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_sbaijmumps_C",
916                                              "MatConvert_SBAIJ_SBAIJMUMPS",MatConvert_SBAIJ_SBAIJMUMPS);CHKERRQ(ierr);
917     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_sbaijmumps_mpisbaij_C",
918                                              "MatConvert_MUMPS_Base",MatConvert_MUMPS_Base);CHKERRQ(ierr);
919   }
920 
921   PetscLogInfo(0,"Using MUMPS for Cholesky factorization and solves.");
922   ierr = PetscObjectChangeTypeName((PetscObject)B,newtype);CHKERRQ(ierr);
923   *newmat = B;
924   PetscFunctionReturn(0);
925 }
926 EXTERN_C_END
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatDuplicate_SBAIJMUMPS"
930 int MatDuplicate_SBAIJMUMPS(Mat A, MatDuplicateOption op, Mat *M) {
931   int       ierr;
932   Mat_MUMPS *lu=(Mat_MUMPS *)A->spptr;
933 
934   PetscFunctionBegin;
935   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
936   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_MUMPS));CHKERRQ(ierr);
937   PetscFunctionReturn(0);
938 }
939 
940 /*MC
941   MATSBAIJMUMPS - MATSBAIJMUMPS = "sbaijmumps" - A symmetric matrix type providing direct solvers (Cholesky) for
942   distributed and sequential matrices via the external package MUMPS.
943 
944   If MUMPS is installed (see the manual for instructions
945   on how to declare the existence of external packages),
946   a matrix type can be constructed which invokes MUMPS solvers.
947   After calling MatCreate(...,A), simply call MatSetType(A,MATSBAIJMUMPS).
948   This matrix type is only supported for double precision real.
949 
950   If created with a single process communicator, this matrix type inherits from MATSEQSBAIJ.
951   Otherwise, this matrix type inherits from MATMPISBAIJ.  Hence for single process communicators,
952   MatSeqSBAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
953   for communicators controlling multiple processes.  It is recommended that you call both of
954   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
955   conversion to or from the MATSEQSBAIJ or MATMPISBAIJ type (depending on the communicator size)
956   without data copy.
957 
958   Options Database Keys:
959 + -mat_type sbaijmumps - sets the matrix type to "sbaijmumps" during a call to MatSetFromOptions()
960 . -mat_mumps_sym <0,1,2> - 0 the matrix is unsymmetric, 1 symmetric positive definite, 2 symmetric
961 . -mat_mumps_icntl_4 <0,...,4> - print level
962 . -mat_mumps_icntl_6 <0,...,7> - matrix prescaling options (see MUMPS User's Guide)
963 . -mat_mumps_icntl_7 <0,...,7> - matrix orderings (see MUMPS User's Guide)
964 . -mat_mumps_icntl_9 <1,2> - A or A^T x=b to be solved: 1 denotes A, 2 denotes A^T
965 . -mat_mumps_icntl_10 <n> - maximum number of iterative refinements
966 . -mat_mumps_icntl_11 <n> - error analysis, a positive value returns statistics during -ksp_view
967 . -mat_mumps_icntl_12 <n> - efficiency control (see MUMPS User's Guide)
968 . -mat_mumps_icntl_13 <n> - efficiency control (see MUMPS User's Guide)
969 . -mat_mumps_icntl_14 <n> - efficiency control (see MUMPS User's Guide)
970 . -mat_mumps_icntl_15 <n> - efficiency control (see MUMPS User's Guide)
971 . -mat_mumps_cntl_1 <delta> - relative pivoting threshold
972 . -mat_mumps_cntl_2 <tol> - stopping criterion for refinement
973 - -mat_mumps_cntl_3 <adelta> - absolute pivoting threshold
974 
975   Level: beginner
976 
977 .seealso: MATAIJMUMPS
978 M*/
979 
980 EXTERN_C_BEGIN
981 #undef __FUNCT__
982 #define __FUNCT__ "MatCreate_SBAIJMUMPS"
983 int MatCreate_SBAIJMUMPS(Mat A) {
984   int ierr,size;
985 
986   PetscFunctionBegin;
987   /* Change type name before calling MatSetType to force proper construction of SeqSBAIJ or MPISBAIJ */
988   /*   and SBAIJMUMPS types */
989   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJMUMPS);CHKERRQ(ierr);
990   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
991   if (size == 1) {
992     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
993   } else {
994     ierr   = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
995   }
996   ierr = MatConvert_SBAIJ_SBAIJMUMPS(A,MATSBAIJMUMPS,&A);CHKERRQ(ierr);
997   PetscFunctionReturn(0);
998 }
999 EXTERN_C_END
1000