xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 262ff7bbb8d19d69e3928431622c455081fa134e)
1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2 /*
3         Provides an interface to the SuperLU_DIST_2.0 sparse solver
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
9 #include "stdlib.h"
10 #endif
11 
12 EXTERN_C_BEGIN
13 #if defined(PETSC_USE_COMPLEX)
14 #include "superlu_zdefs.h"
15 #else
16 #include "superlu_ddefs.h"
17 #endif
18 EXTERN_C_END
19 
20 typedef enum { GLOBAL,DISTRIBUTED
21 } SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   MatStructure            flg;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* A few function pointers for inheritance */
42   int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   int (*MatView)(Mat,PetscViewer);
44   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   int (*MatDestroy)(Mat);
47 
48   /* Flag to clean up (non-global) SuperLU objects during Destroy */
49   PetscTruth CleanUpSuperLU_Dist;
50 } Mat_SuperLU_DIST;
51 
52 EXTERN int MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
53 
54 EXTERN_C_BEGIN
55 #undef __FUNCT__
56 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
57 int MatConvert_SuperLU_DIST_Base(Mat A,MatType type,Mat *newmat) {
58   int              ierr;
59   Mat              B=*newmat;
60   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
61 
62   PetscFunctionBegin;
63   if (B != A) {
64     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
65   }
66   /* Reset the original function pointers */
67   B->ops->duplicate        = lu->MatDuplicate;
68   B->ops->view             = lu->MatView;
69   B->ops->assemblyend      = lu->MatAssemblyEnd;
70   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
71   B->ops->destroy          = lu->MatDestroy;
72 
73   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
74   ierr = PetscFree(lu);CHKERRQ(ierr);
75 
76   *newmat = B;
77   PetscFunctionReturn(0);
78 }
79 EXTERN_C_END
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
83 int MatDestroy_SuperLU_DIST(Mat A)
84 {
85   int              ierr,size;
86   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
87 
88   PetscFunctionBegin;
89   if (lu->CleanUpSuperLU_Dist) {
90     /* Deallocate SuperLU_DIST storage */
91     if (lu->MatInputMode == GLOBAL) {
92       Destroy_CompCol_Matrix_dist(&lu->A_sup);
93     } else {
94       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
95       if ( lu->options.SolveInitialized ) {
96 #if defined(PETSC_USE_COMPLEX)
97         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
98 #else
99         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
100 #endif
101       }
102     }
103     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
104     ScalePermstructFree(&lu->ScalePermstruct);
105     LUstructFree(&lu->LUstruct);
106 
107     /* Release the SuperLU_DIST process grid. */
108     superlu_gridexit(&lu->grid);
109 
110     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
111   }
112 
113   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
114   if (size == 1) {
115     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
116   } else {
117     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
118   }
119   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
120 
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatSolve_SuperLU_DIST"
126 int MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
127 {
128   Mat_MPIAIJ       *aa = (Mat_MPIAIJ*)A->data;
129   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
130   int              ierr, size;
131   int              m=A->M, N=A->N;
132   SuperLUStat_t    stat;
133   double           berr[1];
134   PetscScalar      *bptr;
135   int              info, nrhs=1;
136   Vec              x_seq;
137   IS               iden;
138   VecScatter       scat;
139 
140   PetscFunctionBegin;
141   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
142   if (size > 1) {
143     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
144       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
145       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
146       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
147       ierr = ISDestroy(iden);CHKERRQ(ierr);
148 
149       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
150       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
151       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
152     } else { /* distributed mat input */
153       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
154       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
155     }
156   } else { /* size == 1 */
157     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
158     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
159   }
160 
161   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
162 
163   PStatInit(&stat);        /* Initialize the statistics variables. */
164 
165   if (lu->MatInputMode == GLOBAL) {
166 #if defined(PETSC_USE_COMPLEX)
167     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
168                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
169 #else
170     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
171                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
172 #endif
173   } else { /* distributed mat input */
174 #if defined(PETSC_USE_COMPLEX)
175     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
176 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
177     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
178 #else
179     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
180 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
181     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
182 #endif
183   }
184   if (lu->StatPrint) {
185      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
186   }
187   PStatFree(&stat);
188 
189   if (size > 1) {
190     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
191       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
192       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
193       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
194       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
195       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
196     } else {
197       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
198     }
199   } else {
200     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
201   }
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
207 int MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F)
208 {
209   Mat_MPIAIJ       *fac = (Mat_MPIAIJ*)(*F)->data,*mat;
210   Mat              *tseq,A_seq = PETSC_NULL;
211   Mat_SeqAIJ       *aa,*bb;
212   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
213   int              M=A->M,N=A->N,info,ierr,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
214                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
215   SuperLUStat_t    stat;
216   double           *berr=0;
217   IS               isrow;
218   PetscLogDouble   time0,time,time_min,time_max;
219 #if defined(PETSC_USE_COMPLEX)
220   doublecomplex    *av, *bv;
221 #else
222   double           *av, *bv;
223 #endif
224 
225   PetscFunctionBegin;
226   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
227   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
228 
229   if (lu->StatPrint) { /* collect time for mat conversion */
230     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
231     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
232   }
233 
234   if (lu->MatInputMode == GLOBAL) { /* global mat input */
235     if (size > 1) { /* convert mpi A to seq mat A */
236       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr);
237       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr);
238       ierr = ISDestroy(isrow);CHKERRQ(ierr);
239 
240       A_seq = *tseq;
241       ierr = PetscFree(tseq);CHKERRQ(ierr);
242       aa =  (Mat_SeqAIJ*)A_seq->data;
243     } else {
244       aa =  (Mat_SeqAIJ*)A->data;
245     }
246 
247     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
248     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
249 #if defined(PETSC_USE_COMPLEX)
250       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
251 #else
252       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
253 #endif
254     } else { /* successive numeric factorization, sparsity pattern is reused. */
255       Destroy_CompCol_Matrix_dist(&lu->A_sup);
256       Destroy_LU(N, &lu->grid, &lu->LUstruct);
257       lu->options.Fact = SamePattern;
258     }
259 #if defined(PETSC_USE_COMPLEX)
260     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
261 #else
262     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
263 #endif
264 
265     /* Create compressed column matrix A_sup. */
266 #if defined(PETSC_USE_COMPLEX)
267     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
268 #else
269     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
270 #endif
271   } else { /* distributed mat input */
272     mat =  (Mat_MPIAIJ*)A->data;
273     aa=(Mat_SeqAIJ*)(mat->A)->data;
274     bb=(Mat_SeqAIJ*)(mat->B)->data;
275     ai=aa->i; aj=aa->j;
276     bi=bb->i; bj=bb->j;
277 #if defined(PETSC_USE_COMPLEX)
278     av=(doublecomplex*)aa->a;
279     bv=(doublecomplex*)bb->a;
280 #else
281     av=aa->a;
282     bv=bb->a;
283 #endif
284     rstart = mat->rstart;
285     nz     = aa->nz + bb->nz;
286     garray = mat->garray;
287     rstart = mat->rstart;
288 
289     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
290 #if defined(PETSC_USE_COMPLEX)
291       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
292 #else
293       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
294 #endif
295     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
296       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
297       Destroy_LU(N, &lu->grid, &lu->LUstruct);
298       lu->options.Fact = SamePattern;
299     }
300     nz = 0; irow = mat->rstart;
301     for ( i=0; i<m; i++ ) {
302       lu->row[i] = nz;
303       countA = ai[i+1] - ai[i];
304       countB = bi[i+1] - bi[i];
305       ajj = aj + ai[i];  /* ptr to the beginning of this row */
306       bjj = bj + bi[i];
307 
308       /* B part, smaller col index */
309       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
310       jB = 0;
311       for (j=0; j<countB; j++){
312         jcol = garray[bjj[j]];
313         if (jcol > colA_start) {
314           jB = j;
315           break;
316         }
317         lu->col[nz] = jcol;
318         lu->val[nz++] = *bv++;
319         if (j==countB-1) jB = countB;
320       }
321 
322       /* A part */
323       for (j=0; j<countA; j++){
324         lu->col[nz] = mat->rstart + ajj[j];
325         lu->val[nz++] = *av++;
326       }
327 
328       /* B part, larger col index */
329       for (j=jB; j<countB; j++){
330         lu->col[nz] = garray[bjj[j]];
331         lu->val[nz++] = *bv++;
332       }
333     }
334     lu->row[m] = nz;
335 #if defined(PETSC_USE_COMPLEX)
336     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
337 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
338 #else
339     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
340 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
341 #endif
342   }
343   if (lu->StatPrint) {
344     ierr = PetscGetTime(&time);CHKERRQ(ierr);
345     time0 = time - time0;
346   }
347 
348   /* Factor the matrix. */
349   PStatInit(&stat);   /* Initialize the statistics variables. */
350 
351   if (lu->MatInputMode == GLOBAL) { /* global mat input */
352 #if defined(PETSC_USE_COMPLEX)
353     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
354                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
355 #else
356     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
357                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
358 #endif
359   } else { /* distributed mat input */
360 #if defined(PETSC_USE_COMPLEX)
361     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
362 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
363     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
364 #else
365     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
366 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
367     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
368 #endif
369   }
370 
371   if (lu->MatInputMode == GLOBAL && size > 1){
372     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
373   }
374 
375   if (lu->StatPrint) {
376     if (size > 1){
377       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
378       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
379       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
380       time = time/size; /* average time */
381       if (!rank)
382         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
383                               %g / %g / %g\n",time_max,time_min,time);
384     } else {
385       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
386                               %g\n",time0);
387     }
388 
389     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
390   }
391   PStatFree(&stat);
392   (*F)->assembled = PETSC_TRUE;
393   lu->flg         = SAME_NONZERO_PATTERN;
394   PetscFunctionReturn(0);
395 }
396 
397 /* Note the Petsc r and c permutations are ignored */
398 #undef __FUNCT__
399 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
400 int MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
401 {
402   Mat               B;
403   Mat_SuperLU_DIST  *lu;
404   int               ierr,M=A->M,N=A->N,size;
405   superlu_options_t options;
406   char              buff[32];
407   PetscTruth        flg;
408   char              *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
409   char              *prtype[] = {"LargeDiag","NATURAL"};
410 
411   PetscFunctionBegin;
412   /* Create the factorization matrix */
413   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
414   ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr);
415   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
416   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
417 
418   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
419   B->ops->solve            = MatSolve_SuperLU_DIST;
420   B->factor                = FACTOR_LU;
421 
422   lu = (Mat_SuperLU_DIST*)(B->spptr);
423 
424   /* Set the input options */
425   set_default_options(&options);
426   lu->MatInputMode = GLOBAL;
427   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
428 
429   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
430   lu->nprow = size/2;               /* Default process rows.      */
431   if (lu->nprow == 0) lu->nprow = 1;
432   lu->npcol = size/lu->nprow;           /* Default process columns.   */
433 
434   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
435 
436     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
437     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
438     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
439 
440     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
441     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
442 
443     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
444     if (!flg) {
445       options.Equil = NO;
446     }
447 
448     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);CHKERRQ(ierr);
449     while (flg) {
450       ierr = PetscStrcmp(buff,"LargeDiag",&flg);CHKERRQ(ierr);
451       if (flg) {
452         options.RowPerm = LargeDiag;
453         break;
454       }
455       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
456       if (flg) {
457         options.RowPerm = NOROWPERM;
458         break;
459       }
460       SETERRQ1(1,"Unknown row permutation %s",buff);
461     }
462 
463     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);CHKERRQ(ierr);
464     while (flg) {
465       ierr = PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);CHKERRQ(ierr);
466       if (flg) {
467         options.ColPerm = MMD_AT_PLUS_A;
468         break;
469       }
470       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
471       if (flg) {
472         options.ColPerm = NATURAL;
473         break;
474       }
475       ierr = PetscStrcmp(buff,"MMD_ATA",&flg);CHKERRQ(ierr);
476       if (flg) {
477         options.ColPerm = MMD_ATA;
478         break;
479       }
480       ierr = PetscStrcmp(buff,"COLAMD",&flg);CHKERRQ(ierr);
481       if (flg) {
482         options.ColPerm = COLAMD;
483         break;
484       }
485       SETERRQ1(1,"Unknown column permutation %s",buff);
486     }
487 
488     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
489     if (!flg) {
490       options.ReplaceTinyPivot = NO;
491     }
492 
493     options.IterRefine = NOREFINE;
494     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
495     if (flg) {
496       options.IterRefine = DOUBLE;
497     }
498 
499     if (PetscLogPrintInfo) {
500       lu->StatPrint = (int)PETSC_TRUE;
501     } else {
502       lu->StatPrint = (int)PETSC_FALSE;
503     }
504     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
505                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
506   PetscOptionsEnd();
507 
508   /* Initialize the SuperLU process grid. */
509   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
510 
511   /* Initialize ScalePermstruct and LUstruct. */
512   ScalePermstructInit(M, N, &lu->ScalePermstruct);
513   LUstructInit(M, N, &lu->LUstruct);
514 
515   lu->options            = options;
516   lu->flg                = DIFFERENT_NONZERO_PATTERN;
517   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
518   *F = B;
519   PetscFunctionReturn(0);
520 }
521 
522 #undef __FUNCT__
523 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
524 int MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
525   int              ierr;
526   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
527 
528   PetscFunctionBegin;
529   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
530   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
531   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
532   PetscFunctionReturn(0);
533 }
534 
535 #undef __FUNCT__
536 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
537 int MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
538 {
539   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
540   superlu_options_t options;
541   int               ierr;
542   char              *colperm;
543 
544   PetscFunctionBegin;
545   /* check if matrix is superlu_dist type */
546   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
547 
548   options = lu->options;
549   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
550   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
551   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
552   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
553   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
554   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
555   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
556   if (options.ColPerm == NATURAL) {
557     colperm = "NATURAL";
558   } else if (options.ColPerm == MMD_AT_PLUS_A) {
559     colperm = "MMD_AT_PLUS_A";
560   } else if (options.ColPerm == MMD_ATA) {
561     colperm = "MMD_ATA";
562   } else if (options.ColPerm == COLAMD) {
563     colperm = "COLAMD";
564   } else {
565     SETERRQ(1,"Unknown column permutation");
566   }
567   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatView_SuperLU_DIST"
573 int MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
574 {
575   int               ierr;
576   PetscTruth        isascii;
577   PetscViewerFormat format;
578   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
579 
580   PetscFunctionBegin;
581   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
582 
583   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
584   if (isascii) {
585     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
586     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
587       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
588     }
589   }
590   PetscFunctionReturn(0);
591 }
592 
593 
594 EXTERN_C_BEGIN
595 #undef __FUNCT__
596 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
597 int MatConvert_Base_SuperLU_DIST(Mat A,MatType type,Mat *newmat) {
598   /* This routine is only called to convert to MATSUPERLU_DIST */
599   /* from MATSEQAIJ if A has a single process communicator */
600   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
601   int              ierr, size;
602   MPI_Comm         comm;
603   Mat              B=*newmat;
604   Mat_SuperLU_DIST *lu;
605 
606   PetscFunctionBegin;
607   if (B != A) {
608     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
609   }
610 
611   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
612   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
613 
614   lu->MatDuplicate         = A->ops->duplicate;
615   lu->MatView              = A->ops->view;
616   lu->MatAssemblyEnd       = A->ops->assemblyend;
617   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
618   lu->MatDestroy           = A->ops->destroy;
619   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
620 
621   B->spptr                 = (void*)lu;
622   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
623   B->ops->view             = MatView_SuperLU_DIST;
624   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
625   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
626   B->ops->destroy          = MatDestroy_SuperLU_DIST;
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
628   if (size == 1) {
629     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
630                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
631     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
632                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
633   } else {
634     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
635                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
636     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
637                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
638   }
639   PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.");
640   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
641   *newmat = B;
642   PetscFunctionReturn(0);
643 }
644 EXTERN_C_END
645 
646 #undef __FUNCT__
647 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
648 int MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
649   int              ierr;
650   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
651 
652   PetscFunctionBegin;
653   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
654   ierr = MatConvert_Base_SuperLU_DIST(*M,MATSUPERLU_DIST,M);CHKERRQ(ierr);
655   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
656   PetscFunctionReturn(0);
657 }
658 
659 /*MC
660   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
661   via the external package SuperLU_DIST.
662 
663   If SuperLU_DIST is installed (see the manual for
664   instructions on how to declare the existence of external packages),
665   a matrix type can be constructed which invokes SuperLU_DIST solvers.
666   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
667   This matrix type is only supported for double precision real.
668 
669   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
670   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
671   MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
672   for communicators controlling multiple processes.  It is recommended that you call both of
673   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
674   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
675   without data copy.
676 
677   Options Database Keys:
678 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
679 . -mat_superlu_dist_r <n> - number of rows in processor partition
680 . -mat_superlu_dist_c <n> - number of columns in processor partition
681 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
682 . -mat_superlu_dist_equil - equilibrate the matrix
683 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
684 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
685 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
686 . -mat_superlu_dist_iterrefine - use iterative refinement
687 - -mat_superlu_dist_statprint - print factorization information
688 
689    Level: beginner
690 
691 .seealso: PCLU
692 M*/
693 
694 EXTERN_C_BEGIN
695 #undef __FUNCT__
696 #define __FUNCT__ "MatCreate_SuperLU_DIST"
697 int MatCreate_SuperLU_DIST(Mat A) {
698   int ierr,size;
699 
700   PetscFunctionBegin;
701   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
702   /*   and SuperLU_DIST types */
703   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
704   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
705   if (size == 1) {
706     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
707   } else {
708     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
709   }
710   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 EXTERN_C_END
714 
715