xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 0dd8e64f52f68e7435a7bfccdbe0fad5e7b4f3f7)
1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2 /*
3         Provides an interface to the SuperLU_DIST_2.0 sparse solver
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
9 #include "stdlib.h"
10 #endif
11 
12 /*MC
13   MATSUPERLU_DIST - a matrix type providing direct solvers for parallel matrices
14   via the external package SuperLU_DIST.
15 
16   If SuperLU_DIST is installed (see the manual for
17   instructions on how to declare the existence of external packages),
18   a matrix type can be constructed which invokes SuperLU_DIST solvers.
19   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
20   This matrix type is only supported for double precision real.
21 
22   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
23   and from MATMPIAIJ otherwise.
24 
25   Options Database Keys:
26 + -mat_type superlu_dist
27 . -mat_superlu_dist_r <n> : number of rows in processor partition
28 . -mat_superlu_dist_c <n> : number of columns in processor partition
29 . -mat_superlu_dist_matinput 0|1 : matrix input mode; 0=global, 1=distributed
30 . -mat_superlu_dist_equil :, equilibrate the matrix
31 . -mat_superlu_dist_rowperm LargeDiag|NATURAL : row permutation
32 . -mat_superlu_dist_colperm MMD_AT_PLUS_A|MMD_ATA|COLAMD|NATURAL : column permutation
33 . -mat_superlu_dist_replacetinypivot : replace tiny pivots
34 . -mat_superlu_dist_iterrefine : use iterative refinement
35 - -mat_superlu_dist_statprint : print factorization information
36 
37 .seealso: PCLU
38 M*/
39 
40 EXTERN_C_BEGIN
41 #if defined(PETSC_USE_COMPLEX)
42 #include "superlu_zdefs.h"
43 #else
44 #include "superlu_ddefs.h"
45 #endif
46 EXTERN_C_END
47 
48 typedef enum { GLOBAL,DISTRIBUTED
49 } SuperLU_MatInputMode;
50 
51 typedef struct {
52   int_t                   nprow,npcol,*row,*col;
53   gridinfo_t              grid;
54   superlu_options_t       options;
55   SuperMatrix             A_sup;
56   ScalePermstruct_t       ScalePermstruct;
57   LUstruct_t              LUstruct;
58   int                     StatPrint;
59   int                     MatInputMode;
60   SOLVEstruct_t           SOLVEstruct;
61   MatStructure            flg;
62   MPI_Comm                comm_superlu;
63 #if defined(PETSC_USE_COMPLEX)
64   doublecomplex           *val;
65 #else
66   double                  *val;
67 #endif
68 
69   /* A few function pointers for inheritance */
70   int (*MatView)(Mat,PetscViewer);
71   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
72   int (*MatDestroy)(Mat);
73 
74   /* Flag to clean up (non-global) SuperLU objects during Destroy */
75   PetscTruth CleanUpSuperLUDist;
76 } Mat_MPIAIJ_SuperLU_DIST;
77 
78 #undef __FUNCT__
79 #define __FUNCT__ "MatDestroy_MPIAIJ_SuperLU_DIST"
80 int MatDestroy_MPIAIJ_SuperLU_DIST(Mat A)
81 {
82   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
83   int                     ierr,(*destroy)(Mat);
84 
85   PetscFunctionBegin;
86   if (lu->CleanUpSuperLUDist) {
87     /* Deallocate SuperLU_DIST storage */
88     if (lu->MatInputMode == GLOBAL) {
89       Destroy_CompCol_Matrix_dist(&lu->A_sup);
90     } else {
91       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
92       if ( lu->options.SolveInitialized ) {
93 #if defined(PETSC_USE_COMPLEX)
94         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
95 #else
96         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
97 #endif
98       }
99     }
100     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
101     ScalePermstructFree(&lu->ScalePermstruct);
102     LUstructFree(&lu->LUstruct);
103 
104     /* Release the SuperLU_DIST process grid. */
105     superlu_gridexit(&lu->grid);
106 
107     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
108   }
109   destroy = lu->MatDestroy;
110   ierr = PetscFree(lu);CHKERRQ(ierr);
111   ierr = (*destroy)(A);CHKERRQ(ierr);
112 
113   PetscFunctionReturn(0);
114 }
115 
116 extern int MatMPIAIJFactorInfo_SuperLu(Mat A,PetscViewer viewer);
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "MatView_MPIAIJ_Spooles_DIST"
120 int MatView_MPIAIJ_SuperLU_DIST(Mat A,PetscViewer viewer)
121 {
122   int                     ierr;
123   PetscTruth              isascii;
124   PetscViewerFormat       format;
125   Mat_MPIAIJ_SuperLU_DIST *lu=(Mat_MPIAIJ_SuperLU_DIST*)(A->spptr);
126 
127   PetscFunctionBegin;
128   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
129 
130   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
131   if (isascii) {
132     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
133     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
134       ierr = MatMPIAIJFactorInfo_SuperLu(A,viewer);CHKERRQ(ierr);
135     }
136   }
137   PetscFunctionReturn(0);
138 }
139 
140 #undef __FUNCT__
141 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ_SuperLU_DIST"
142 int MatAssemblyEnd_MPIAIJ_SuperLU_DIST(Mat A,MatAssemblyType mode) {
143   int                     ierr;
144   Mat_MPIAIJ_SuperLU_DIST *lu=(Mat_MPIAIJ_SuperLU_DIST*)(A->spptr);
145 
146   PetscFunctionBegin;
147   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
148   ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 #undef __FUNCT__
153 #define __FUNCT__ "MatSolve_MPIAIJ_SuperLU_DIST"
154 int MatSolve_MPIAIJ_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
155 {
156   Mat_MPIAIJ              *aa = (Mat_MPIAIJ*)A->data;
157   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
158   int                     ierr, size=aa->size;
159   int                     m=A->M, N=A->N;
160   SuperLUStat_t           stat;
161   double                  berr[1];
162   PetscScalar             *bptr;
163   int                     info, nrhs=1;
164   Vec                     x_seq;
165   IS                      iden;
166   VecScatter              scat;
167   PetscLogDouble          time0,time,time_min,time_max;
168 
169   PetscFunctionBegin;
170   if (size > 1) {
171     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
172       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
173       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
174       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
175       ierr = ISDestroy(iden);CHKERRQ(ierr);
176 
177       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
178       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
179       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
180     } else { /* distributed mat input */
181       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
182       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
183     }
184   } else { /* size == 1 */
185     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
186     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
187   }
188 
189   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
190 
191   PStatInit(&stat);        /* Initialize the statistics variables. */
192   if (lu->StatPrint) {
193     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); /* to be removed */
194     ierr = PetscGetTime(&time0);CHKERRQ(ierr);  /* to be removed */
195   }
196   if (lu->MatInputMode == GLOBAL) {
197 #if defined(PETSC_USE_COMPLEX)
198     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
199                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
200 #else
201     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
202                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
203 #endif
204   } else { /* distributed mat input */
205 #if defined(PETSC_USE_COMPLEX)
206     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
207 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
208     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
209 #else
210     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
211 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
212     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
213 #endif
214   }
215   if (lu->StatPrint) {
216     ierr = PetscGetTime(&time);CHKERRQ(ierr);  /* to be removed */
217      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
218   }
219   PStatFree(&stat);
220 
221   if (size > 1) {
222     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
223       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
224       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
225       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
226       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
227       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
228     } else {
229       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
230     }
231   } else {
232     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
233   }
234   if (lu->StatPrint) {
235     time0 = time - time0;
236     ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);CHKERRQ(ierr);
237     ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);CHKERRQ(ierr);
238     ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);CHKERRQ(ierr);
239     time = time/size; /* average time */
240     ierr = PetscPrintf(A->comm, "  Time for superlu_dist solve (max/min/avg): %g / %g / %g\n\n",time_max,time_min,time);CHKERRQ(ierr);
241   }
242   PetscFunctionReturn(0);
243 }
244 
245 #undef __FUNCT__
246 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ_SuperLU_DIST"
247 int MatLUFactorNumeric_MPIAIJ_SuperLU_DIST(Mat A,Mat *F)
248 {
249   Mat_MPIAIJ              *fac = (Mat_MPIAIJ*)(*F)->data,*mat;
250   Mat                     *tseq,A_seq = PETSC_NULL;
251   Mat_SeqAIJ              *aa,*bb;
252   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)(*F)->spptr;
253   int                     M=A->M,N=A->N,info,ierr,size=fac->size,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
254                           m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
255   SuperLUStat_t           stat;
256   double                  *berr=0;
257   IS                      isrow;
258   PetscLogDouble          time0[2],time[2],time_min[2],time_max[2];
259 #if defined(PETSC_USE_COMPLEX)
260   doublecomplex           *av, *bv;
261 #else
262   double                  *av, *bv;
263 #endif
264 
265   PetscFunctionBegin;
266   if (lu->StatPrint) {
267     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
268     ierr = PetscGetTime(&time0[0]);CHKERRQ(ierr);
269   }
270 
271   if (lu->MatInputMode == GLOBAL) { /* global mat input */
272     if (size > 1) { /* convert mpi A to seq mat A */
273       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr);
274       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr);
275       ierr = ISDestroy(isrow);CHKERRQ(ierr);
276 
277       A_seq = *tseq;
278       ierr = PetscFree(tseq);CHKERRQ(ierr);
279       aa =  (Mat_SeqAIJ*)A_seq->data;
280     } else {
281       aa =  (Mat_SeqAIJ*)A->data;
282     }
283 
284     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
285     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
286 #if defined(PETSC_USE_COMPLEX)
287       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
288 #else
289       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
290 #endif
291     } else { /* successive numeric factorization, sparsity pattern is reused. */
292       Destroy_CompCol_Matrix_dist(&lu->A_sup);
293       Destroy_LU(N, &lu->grid, &lu->LUstruct);
294       lu->options.Fact = SamePattern;
295     }
296 #if defined(PETSC_USE_COMPLEX)
297     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
298 #else
299     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
300 #endif
301 
302     /* Create compressed column matrix A_sup. */
303 #if defined(PETSC_USE_COMPLEX)
304     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
305 #else
306     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
307 #endif
308   } else { /* distributed mat input */
309     mat =  (Mat_MPIAIJ*)A->data;
310     aa=(Mat_SeqAIJ*)(mat->A)->data;
311     bb=(Mat_SeqAIJ*)(mat->B)->data;
312     ai=aa->i; aj=aa->j;
313     bi=bb->i; bj=bb->j;
314 #if defined(PETSC_USE_COMPLEX)
315     av=(doublecomplex*)aa->a;
316     bv=(doublecomplex*)bb->a;
317 #else
318     av=aa->a;
319     bv=bb->a;
320 #endif
321     rstart = mat->rstart;
322     nz     = aa->nz + bb->nz;
323     garray = mat->garray;
324     rstart = mat->rstart;
325 
326     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
327 #if defined(PETSC_USE_COMPLEX)
328       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
329 #else
330       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
331 #endif
332     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
333       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
334       Destroy_LU(N, &lu->grid, &lu->LUstruct);
335       lu->options.Fact = SamePattern;
336     }
337     nz = 0; jB = 0; irow = mat->rstart;
338     for ( i=0; i<m; i++ ) {
339       lu->row[i] = nz;
340       countA = ai[i+1] - ai[i];
341       countB = bi[i+1] - bi[i];
342       ajj = aj + ai[i];  /* ptr to the beginning of this row */
343       bjj = bj + bi[i];
344 
345       /* B part, smaller col index */
346       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
347       for (j=0; j<countB; j++){
348         jcol = garray[bjj[j]];
349         if (jcol > colA_start) {
350           jB = j;
351           break;
352         }
353         lu->col[nz] = jcol;
354         lu->val[nz++] = *bv++;
355         if (j==countB-1) jB = countB;
356       }
357 
358       /* A part */
359       for (j=0; j<countA; j++){
360         lu->col[nz] = mat->rstart + ajj[j];
361         lu->val[nz++] = *av++;
362       }
363 
364       /* B part, larger col index */
365       for (j=jB; j<countB; j++){
366         lu->col[nz] = garray[bjj[j]];
367         lu->val[nz++] = *bv++;
368       }
369     }
370     lu->row[m] = nz;
371 #if defined(PETSC_USE_COMPLEX)
372     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
373 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
374 #else
375     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
376 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
377 #endif
378   }
379   if (lu->StatPrint) {
380     ierr = PetscGetTime(&time[0]);CHKERRQ(ierr);
381     time0[0] = time[0] - time0[0];
382   }
383 
384   /* Factor the matrix. */
385   PStatInit(&stat);   /* Initialize the statistics variables. */
386 
387   if (lu->StatPrint) {
388     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
389     ierr = PetscGetTime(&time0[1]);CHKERRQ(ierr);
390   }
391 
392   if (lu->MatInputMode == GLOBAL) { /* global mat input */
393 #if defined(PETSC_USE_COMPLEX)
394     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
395                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
396 #else
397     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
398                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
399 #endif
400   } else { /* distributed mat input */
401 #if defined(PETSC_USE_COMPLEX)
402     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
403 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
404     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
405 #else
406     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
407 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
408     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
409 #endif
410   }
411   if (lu->StatPrint) {
412     ierr = PetscGetTime(&time[1]);CHKERRQ(ierr);  /* to be removed */
413     time0[1] = time[1] - time0[1];
414     if (lu->StatPrint) PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
415   }
416   PStatFree(&stat);
417 
418   if (lu->MatInputMode == GLOBAL && size > 1){
419     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
420   }
421 
422   if (lu->StatPrint) {
423     ierr = MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm);
424     ierr = MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm);
425     ierr = MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm);
426     for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */
427     ierr = PetscPrintf(A->comm, "  Time for mat conversion (max/min/avg):    %g / %g / %g\n",time_max[0],time_min[0],time[0]);
428     ierr = PetscPrintf(A->comm, "  Time for superlu_dist fact (max/min/avg): %g / %g / %g\n\n",time_max[1],time_min[1],time[1]);
429   }
430   (*F)->assembled = PETSC_TRUE;
431   lu->flg         = SAME_NONZERO_PATTERN;
432   PetscFunctionReturn(0);
433 }
434 
435 /* Note the Petsc r and c permutations are ignored */
436 #undef __FUNCT__
437 #define __FUNCT__ "MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST"
438 int MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
439 {
440   Mat                     B;
441   Mat_MPIAIJ_SuperLU_DIST *lu;
442   int                     ierr,M=A->M,N=A->N,size;
443   superlu_options_t       options;
444   char                    buff[32];
445   PetscTruth              flg;
446   char                    *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
447   char                    *prtype[] = {"LargeDiag","NATURAL"};
448   PetscFunctionBegin;
449 
450   /* Create the factorization matrix */
451   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
452   ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr);
453   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
454   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
455 
456   B->ops->solve            = MatSolve_MPIAIJ_SuperLU_DIST;
457   B->factor                = FACTOR_LU;
458 
459   lu = (Mat_MPIAIJ_SuperLU_DIST*)(B->spptr);
460 
461   /* Set the input options */
462   set_default_options(&options);
463   lu->MatInputMode = GLOBAL;
464   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
465 
466   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
467   lu->nprow = size/2;               /* Default process rows.      */
468   if (lu->nprow == 0) lu->nprow = 1;
469   lu->npcol = size/lu->nprow;           /* Default process columns.   */
470 
471   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
472 
473     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
474     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
475     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
476 
477     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
478     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
479 
480     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
481     if (!flg) {
482       options.Equil = NO;
483     }
484 
485     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);CHKERRQ(ierr);
486     while (flg) {
487       ierr = PetscStrcmp(buff,"LargeDiag",&flg);CHKERRQ(ierr);
488       if (flg) {
489         options.RowPerm = LargeDiag;
490         break;
491       }
492       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
493       if (flg) {
494         options.RowPerm = NOROWPERM;
495         break;
496       }
497       SETERRQ1(1,"Unknown row permutation %s",buff);
498     }
499 
500     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);CHKERRQ(ierr);
501     while (flg) {
502       ierr = PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);CHKERRQ(ierr);
503       if (flg) {
504         options.ColPerm = MMD_AT_PLUS_A;
505         break;
506       }
507       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
508       if (flg) {
509         options.ColPerm = NATURAL;
510         break;
511       }
512       ierr = PetscStrcmp(buff,"MMD_ATA",&flg);CHKERRQ(ierr);
513       if (flg) {
514         options.ColPerm = MMD_ATA;
515         break;
516       }
517       ierr = PetscStrcmp(buff,"COLAMD",&flg);CHKERRQ(ierr);
518       if (flg) {
519         options.ColPerm = COLAMD;
520         break;
521       }
522       SETERRQ1(1,"Unknown column permutation %s",buff);
523     }
524 
525     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
526     if (!flg) {
527       options.ReplaceTinyPivot = NO;
528     }
529 
530     options.IterRefine = NOREFINE;
531     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
532     if (flg) {
533       options.IterRefine = DOUBLE;
534     }
535 
536     if (PetscLogPrintInfo) {
537       lu->StatPrint = (int)PETSC_TRUE;
538     } else {
539       lu->StatPrint = (int)PETSC_FALSE;
540     }
541     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
542                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
543   PetscOptionsEnd();
544 
545   /* Initialize the SuperLU process grid. */
546   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
547 
548   /* Initialize ScalePermstruct and LUstruct. */
549   ScalePermstructInit(M, N, &lu->ScalePermstruct);
550   LUstructInit(M, N, &lu->LUstruct);
551 
552   lu->options            = options;
553   lu->flg                = DIFFERENT_NONZERO_PATTERN;
554   lu->CleanUpSuperLUDist = PETSC_TRUE;
555   *F = B;
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatUseSuperLU_DIST_MPIAIJ"
561 int MatUseSuperLU_DIST_MPIAIJ(Mat A)
562 {
563   PetscFunctionBegin;
564   A->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST;
565   A->ops->lufactornumeric  = MatLUFactorNumeric_MPIAIJ_SuperLU_DIST;
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatMPIAIJFactorInfo_SuperLu"
571 int MatMPIAIJFactorInfo_SuperLu(Mat A,PetscViewer viewer)
572 {
573   Mat_MPIAIJ_SuperLU_DIST *lu= (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
574   superlu_options_t       options;
575   int                     ierr;
576   char                    *colperm;
577 
578   PetscFunctionBegin;
579   /* check if matrix is superlu_dist type */
580   if (A->ops->solve != MatSolve_MPIAIJ_SuperLU_DIST) PetscFunctionReturn(0);
581 
582   options = lu->options;
583   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
584   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
585   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
586   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
587   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
588   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
589   if (options.ColPerm == NATURAL) {
590     colperm = "NATURAL";
591   } else if (options.ColPerm == MMD_AT_PLUS_A) {
592     colperm = "MMD_AT_PLUS_A";
593   } else if (options.ColPerm == MMD_ATA) {
594     colperm = "MMD_ATA";
595   } else if (options.ColPerm == COLAMD) {
596     colperm = "COLAMD";
597   } else {
598     SETERRQ(1,"Unknown column permutation");
599   }
600   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
601   PetscFunctionReturn(0);
602 }
603 
604 EXTERN_C_BEGIN
605 #undef __FUNCT__
606 #define __FUNCT__ "MatCreate_MPIAIJ_SuperLU_DIST"
607 int MatCreate_MPIAIJ_SuperLU_DIST(Mat A) {
608   int                     ierr,size;
609   MPI_Comm                comm;
610   Mat_MPIAIJ_SuperLU_DIST *lu;
611 
612   PetscFunctionBegin;
613   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
614   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
615   if (size == 1) {
616     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
617   } else {
618     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
619   }
620   ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr);
621 
622   ierr                   = PetscNew(Mat_MPIAIJ_SuperLU_DIST,&lu);CHKERRQ(ierr);
623   lu->MatView            = A->ops->view;
624   lu->MatAssemblyEnd     = A->ops->assemblyend;
625   lu->MatDestroy         = A->ops->destroy;
626   lu->CleanUpSuperLUDist = PETSC_FALSE;
627   A->spptr               = (void*)lu;
628   A->ops->view           = MatView_MPIAIJ_SuperLU_DIST;
629   A->ops->assemblyend    = MatAssemblyEnd_MPIAIJ_SuperLU_DIST;
630   A->ops->destroy        = MatDestroy_MPIAIJ_SuperLU_DIST;
631   PetscFunctionReturn(0);
632 }
633 EXTERN_C_END
634 
635 EXTERN_C_BEGIN
636 #undef __FUNCT__
637 #define __FUNCT__ "MatLoad_MPIAIJ_SuperLU_DIST"
638 int MatLoad_MPIAIJ_SuperLU_DIST(PetscViewer viewer,MatType type,Mat *A) {
639   int      ierr,size,(*r)(PetscViewer,MatType,Mat*);
640   MPI_Comm comm;
641 
642   PetscFunctionBegin;
643   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
644   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
645   if (size == 1) {
646     ierr = PetscFListFind(comm,MatLoadList,MATSEQAIJ,(void(**)(void))&r);CHKERRQ(ierr);
647   } else {
648     ierr = PetscFListFind(comm,MatLoadList,MATMPIAIJ,(void(**)(void))&r);CHKERRQ(ierr);
649   }
650   ierr = (*r)(viewer,type,A);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 EXTERN_C_END
654