xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision d54de34fa6b60aff6e21b0c3e68569597cc8f0ee)
1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2 /*
3         Provides an interface to the SuperLU_DIST_2.0 sparse solver
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
9 #include "stdlib.h"
10 #endif
11 
12 /*MC
13   MATSUPERLU_DIST - a matrix type providing direct solvers for parallel matrices
14   via the external package SuperLU_DIST.
15 
16   If SuperLU_DIST is installed (see the manual for
17   instructions on how to declare the existence of external packages),
18   a matrix type can be constructed which invokes SuperLU_DIST solvers.
19   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
20   This matrix type is only supported for double precision real.
21 
22   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
23   and from MATMPIAIJ otherwise.
24 
25   Options Database Keys:
26 + -mat_type superlu_dist
27 . -mat_superlu_dist_r <n> : number of rows in processor partition
28 . -mat_superlu_dist_c <n> : number of columns in processor partition
29 . -mat_superlu_dist_matinput 0|1 : matrix input mode; 0=global, 1=distributed
30 . -mat_superlu_dist_equil :, equilibrate the matrix
31 . -mat_superlu_dist_rowperm LargeDiag|NATURAL : row permutation
32 . -mat_superlu_dist_colperm MMD_AT_PLUS_A|MMD_ATA|COLAMD|NATURAL : column permutation
33 . -mat_superlu_dist_replacetinypivot : replace tiny pivots
34 . -mat_superlu_dist_iterrefine : use iterative refinement
35 - -mat_superlu_dist_statprint : print factorization information
36 
37 .seealso: PCLU
38 M*/
39 
40 EXTERN_C_BEGIN
41 #if defined(PETSC_USE_COMPLEX)
42 #include "superlu_zdefs.h"
43 #else
44 #include "superlu_ddefs.h"
45 #endif
46 EXTERN_C_END
47 
48 typedef enum { GLOBAL,DISTRIBUTED
49 } SuperLU_MatInputMode;
50 
51 typedef struct {
52   int_t                   nprow,npcol,*row,*col;
53   gridinfo_t              grid;
54   superlu_options_t       options;
55   SuperMatrix             A_sup;
56   ScalePermstruct_t       ScalePermstruct;
57   LUstruct_t              LUstruct;
58   int                     StatPrint;
59   int                     MatInputMode;
60   SOLVEstruct_t           SOLVEstruct;
61   MatStructure            flg;
62   MPI_Comm                comm_superlu;
63 #if defined(PETSC_USE_COMPLEX)
64   doublecomplex           *val;
65 #else
66   double                  *val;
67 #endif
68 
69   /* A few function pointers for inheritance */
70   int (*MatView)(Mat,PetscViewer);
71   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
72   int (*MatDestroy)(Mat);
73 
74   /* Flag to clean up (non-global) SuperLU objects during Destroy */
75   PetscTruth CleanUpSuperLUDist;
76 } Mat_MPIAIJ_SuperLU_DIST;
77 
78 #undef __FUNCT__
79 #define __FUNCT__ "MatDestroy_MPIAIJ_SuperLU_DIST"
80 int MatDestroy_MPIAIJ_SuperLU_DIST(Mat A)
81 {
82   Mat_MPIAIJ              *a  = (Mat_MPIAIJ*)A->data;
83   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
84   int                     ierr,(*destroy)(Mat);
85 
86   PetscFunctionBegin;
87   if (lu->CleanUpSuperLUDist) {
88     /* Deallocate SuperLU_DIST storage */
89     if (lu->MatInputMode == GLOBAL) {
90       Destroy_CompCol_Matrix_dist(&lu->A_sup);
91     } else {
92       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
93       if ( lu->options.SolveInitialized ) {
94 #if defined(PETSC_USE_COMPLEX)
95         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
96 #else
97         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
98 #endif
99       }
100     }
101     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
102     ScalePermstructFree(&lu->ScalePermstruct);
103     LUstructFree(&lu->LUstruct);
104 
105     /* Release the SuperLU_DIST process grid. */
106     superlu_gridexit(&lu->grid);
107 
108     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
109   }
110   destroy = lu->MatDestroy;
111   ierr = PetscFree(lu);CHKERRQ(ierr);
112   ierr = (*destroy)(A);CHKERRQ(ierr);
113 
114   PetscFunctionReturn(0);
115 }
116 
117 extern int MatMPIAIJFactorInfo_SuperLu(Mat A,PetscViewer viewer);
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "MatView_MPIAIJ_Spooles_DIST"
121 int MatView_MPIAIJ_SuperLU_DIST(Mat A,PetscViewer viewer)
122 {
123   int                     ierr;
124   PetscTruth              isascii;
125   PetscViewerFormat       format;
126   Mat_MPIAIJ_SuperLU_DIST *lu=(Mat_MPIAIJ_SuperLU_DIST*)(A->spptr);
127 
128   PetscFunctionBegin;
129   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
130 
131   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
132   if (isascii) {
133     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
134     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
135       ierr = MatMPIAIJFactorInfo_SuperLu(A,viewer);CHKERRQ(ierr);
136     }
137   }
138   PetscFunctionReturn(0);
139 }
140 
141 #undef __FUNCT__
142 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ_SuperLU_DIST"
143 int MatAssemblyEnd_MPIAIJ_SuperLU_DIST(Mat A,MatAssemblyType mode) {
144   int                     ierr;
145   Mat_MPIAIJ_SuperLU_DIST *lu=(Mat_MPIAIJ_SuperLU_DIST*)(A->spptr);
146 
147   PetscFunctionBegin;
148   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
149   ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr);
150   PetscFunctionReturn(0);
151 }
152 
153 #undef __FUNCT__
154 #define __FUNCT__ "MatSolve_MPIAIJ_SuperLU_DIST"
155 int MatSolve_MPIAIJ_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
156 {
157   Mat_MPIAIJ              *aa = (Mat_MPIAIJ*)A->data;
158   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
159   int                     ierr, size=aa->size;
160   int                     m=A->M, N=A->N;
161   SuperLUStat_t           stat;
162   double                  berr[1];
163   PetscScalar             *bptr;
164   int                     info, nrhs=1;
165   Vec                     x_seq;
166   IS                      iden;
167   VecScatter              scat;
168   PetscLogDouble          time0,time,time_min,time_max;
169 
170   PetscFunctionBegin;
171   if (size > 1) {
172     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
173       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
174       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
175       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
176       ierr = ISDestroy(iden);CHKERRQ(ierr);
177 
178       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
179       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
180       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
181     } else { /* distributed mat input */
182       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
183       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
184     }
185   } else { /* size == 1 */
186     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
187     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
188   }
189 
190   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
191 
192   PStatInit(&stat);        /* Initialize the statistics variables. */
193   if (lu->StatPrint) {
194     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); /* to be removed */
195     ierr = PetscGetTime(&time0);CHKERRQ(ierr);  /* to be removed */
196   }
197   if (lu->MatInputMode == GLOBAL) {
198 #if defined(PETSC_USE_COMPLEX)
199     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
200                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
201 #else
202     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
203                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
204 #endif
205   } else { /* distributed mat input */
206 #if defined(PETSC_USE_COMPLEX)
207     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
208 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
209     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
210 #else
211     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
212 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
213     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
214 #endif
215   }
216   if (lu->StatPrint) {
217     ierr = PetscGetTime(&time);CHKERRQ(ierr);  /* to be removed */
218      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
219   }
220   PStatFree(&stat);
221 
222   if (size > 1) {
223     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
224       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
225       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
226       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
227       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
228       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
229     } else {
230       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
231     }
232   } else {
233     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
234   }
235   if (lu->StatPrint) {
236     time0 = time - time0;
237     ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);CHKERRQ(ierr);
238     ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);CHKERRQ(ierr);
239     ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);CHKERRQ(ierr);
240     time = time/size; /* average time */
241     ierr = PetscPrintf(A->comm, "  Time for superlu_dist solve (max/min/avg): %g / %g / %g\n\n",time_max,time_min,time);CHKERRQ(ierr);
242   }
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ_SuperLU_DIST"
248 int MatLUFactorNumeric_MPIAIJ_SuperLU_DIST(Mat A,Mat *F)
249 {
250   Mat_MPIAIJ              *fac = (Mat_MPIAIJ*)(*F)->data,*mat;
251   Mat                     *tseq,A_seq = PETSC_NULL;
252   Mat_SeqAIJ              *aa,*bb;
253   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)(*F)->spptr;
254   int                     M=A->M,N=A->N,info,ierr,size=fac->size,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
255                           m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
256   SuperLUStat_t           stat;
257   double                  *berr=0;
258   IS                      isrow;
259   PetscLogDouble          time0[2],time[2],time_min[2],time_max[2];
260 #if defined(PETSC_USE_COMPLEX)
261   doublecomplex           *av, *bv;
262 #else
263   double                  *av, *bv;
264 #endif
265 
266   PetscFunctionBegin;
267   if (lu->StatPrint) {
268     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
269     ierr = PetscGetTime(&time0[0]);CHKERRQ(ierr);
270   }
271 
272   if (lu->MatInputMode == GLOBAL) { /* global mat input */
273     if (size > 1) { /* convert mpi A to seq mat A */
274       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr);
275       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr);
276       ierr = ISDestroy(isrow);CHKERRQ(ierr);
277 
278       A_seq = *tseq;
279       ierr = PetscFree(tseq);CHKERRQ(ierr);
280       aa =  (Mat_SeqAIJ*)A_seq->data;
281     } else {
282       aa =  (Mat_SeqAIJ*)A->data;
283     }
284 
285     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
286     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
287 #if defined(PETSC_USE_COMPLEX)
288       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
289 #else
290       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
291 #endif
292     } else { /* successive numeric factorization, sparsity pattern is reused. */
293       Destroy_CompCol_Matrix_dist(&lu->A_sup);
294       Destroy_LU(N, &lu->grid, &lu->LUstruct);
295       lu->options.Fact = SamePattern;
296     }
297 #if defined(PETSC_USE_COMPLEX)
298     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
299 #else
300     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
301 #endif
302 
303     /* Create compressed column matrix A_sup. */
304 #if defined(PETSC_USE_COMPLEX)
305     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
306 #else
307     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
308 #endif
309   } else { /* distributed mat input */
310     mat =  (Mat_MPIAIJ*)A->data;
311     aa=(Mat_SeqAIJ*)(mat->A)->data;
312     bb=(Mat_SeqAIJ*)(mat->B)->data;
313     ai=aa->i; aj=aa->j;
314     bi=bb->i; bj=bb->j;
315 #if defined(PETSC_USE_COMPLEX)
316     av=(doublecomplex*)aa->a;
317     bv=(doublecomplex*)bb->a;
318 #else
319     av=aa->a;
320     bv=bb->a;
321 #endif
322     rstart = mat->rstart;
323     nz     = aa->nz + bb->nz;
324     garray = mat->garray;
325     rstart = mat->rstart;
326 
327     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
328 #if defined(PETSC_USE_COMPLEX)
329       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
330 #else
331       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
332 #endif
333     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
334       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
335       Destroy_LU(N, &lu->grid, &lu->LUstruct);
336       lu->options.Fact = SamePattern;
337     }
338     nz = 0; jB = 0; irow = mat->rstart;
339     for ( i=0; i<m; i++ ) {
340       lu->row[i] = nz;
341       countA = ai[i+1] - ai[i];
342       countB = bi[i+1] - bi[i];
343       ajj = aj + ai[i];  /* ptr to the beginning of this row */
344       bjj = bj + bi[i];
345 
346       /* B part, smaller col index */
347       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
348       for (j=0; j<countB; j++){
349         jcol = garray[bjj[j]];
350         if (jcol > colA_start) {
351           jB = j;
352           break;
353         }
354         lu->col[nz] = jcol;
355         lu->val[nz++] = *bv++;
356         if (j==countB-1) jB = countB;
357       }
358 
359       /* A part */
360       for (j=0; j<countA; j++){
361         lu->col[nz] = mat->rstart + ajj[j];
362         lu->val[nz++] = *av++;
363       }
364 
365       /* B part, larger col index */
366       for (j=jB; j<countB; j++){
367         lu->col[nz] = garray[bjj[j]];
368         lu->val[nz++] = *bv++;
369       }
370     }
371     lu->row[m] = nz;
372 #if defined(PETSC_USE_COMPLEX)
373     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
374 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
375 #else
376     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
377 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
378 #endif
379   }
380   if (lu->StatPrint) {
381     ierr = PetscGetTime(&time[0]);CHKERRQ(ierr);
382     time0[0] = time[0] - time0[0];
383   }
384 
385   /* Factor the matrix. */
386   PStatInit(&stat);   /* Initialize the statistics variables. */
387 
388   if (lu->StatPrint) {
389     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
390     ierr = PetscGetTime(&time0[1]);CHKERRQ(ierr);
391   }
392 
393   if (lu->MatInputMode == GLOBAL) { /* global mat input */
394 #if defined(PETSC_USE_COMPLEX)
395     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
396                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
397 #else
398     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
399                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
400 #endif
401   } else { /* distributed mat input */
402 #if defined(PETSC_USE_COMPLEX)
403     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
404 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
405     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
406 #else
407     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
408 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
409     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
410 #endif
411   }
412   if (lu->StatPrint) {
413     ierr = PetscGetTime(&time[1]);CHKERRQ(ierr);  /* to be removed */
414     time0[1] = time[1] - time0[1];
415     if (lu->StatPrint) PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
416   }
417   PStatFree(&stat);
418 
419   if (lu->MatInputMode == GLOBAL && size > 1){
420     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
421   }
422 
423   if (lu->StatPrint) {
424     ierr = MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm);
425     ierr = MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm);
426     ierr = MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm);
427     for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */
428     ierr = PetscPrintf(A->comm, "  Time for mat conversion (max/min/avg):    %g / %g / %g\n",time_max[0],time_min[0],time[0]);
429     ierr = PetscPrintf(A->comm, "  Time for superlu_dist fact (max/min/avg): %g / %g / %g\n\n",time_max[1],time_min[1],time[1]);
430   }
431   (*F)->assembled = PETSC_TRUE;
432   lu->flg         = SAME_NONZERO_PATTERN;
433   PetscFunctionReturn(0);
434 }
435 
436 /* Note the Petsc r and c permutations are ignored */
437 #undef __FUNCT__
438 #define __FUNCT__ "MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST"
439 int MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
440 {
441   Mat                     B;
442   Mat_MPIAIJ_SuperLU_DIST *lu;
443   int                     ierr,M=A->M,N=A->N,size;
444   superlu_options_t       options;
445   char                    buff[32];
446   PetscTruth              flg;
447   char                    *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
448   char                    *prtype[] = {"LargeDiag","NATURAL"};
449   PetscFunctionBegin;
450 
451   /* Create the factorization matrix */
452   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
453   ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr);
454   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
455   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
456 
457   B->ops->solve            = MatSolve_MPIAIJ_SuperLU_DIST;
458   B->factor                = FACTOR_LU;
459 
460   lu = (Mat_MPIAIJ_SuperLU_DIST*)(B->spptr);
461 
462   /* Set the input options */
463   set_default_options(&options);
464   lu->MatInputMode = GLOBAL;
465   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
466 
467   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
468   lu->nprow = size/2;               /* Default process rows.      */
469   if (lu->nprow == 0) lu->nprow = 1;
470   lu->npcol = size/lu->nprow;           /* Default process columns.   */
471 
472   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
473 
474     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
475     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
476     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
477 
478     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
479     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
480 
481     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
482     if (!flg) {
483       options.Equil = NO;
484     }
485 
486     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);CHKERRQ(ierr);
487     while (flg) {
488       ierr = PetscStrcmp(buff,"LargeDiag",&flg);CHKERRQ(ierr);
489       if (flg) {
490         options.RowPerm = LargeDiag;
491         break;
492       }
493       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
494       if (flg) {
495         options.RowPerm = NOROWPERM;
496         break;
497       }
498       SETERRQ1(1,"Unknown row permutation %s",buff);
499     }
500 
501     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);CHKERRQ(ierr);
502     while (flg) {
503       ierr = PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);CHKERRQ(ierr);
504       if (flg) {
505         options.ColPerm = MMD_AT_PLUS_A;
506         break;
507       }
508       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
509       if (flg) {
510         options.ColPerm = NATURAL;
511         break;
512       }
513       ierr = PetscStrcmp(buff,"MMD_ATA",&flg);CHKERRQ(ierr);
514       if (flg) {
515         options.ColPerm = MMD_ATA;
516         break;
517       }
518       ierr = PetscStrcmp(buff,"COLAMD",&flg);CHKERRQ(ierr);
519       if (flg) {
520         options.ColPerm = COLAMD;
521         break;
522       }
523       SETERRQ1(1,"Unknown column permutation %s",buff);
524     }
525 
526     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
527     if (!flg) {
528       options.ReplaceTinyPivot = NO;
529     }
530 
531     options.IterRefine = NOREFINE;
532     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
533     if (flg) {
534       options.IterRefine = DOUBLE;
535     }
536 
537     if (PetscLogPrintInfo) {
538       lu->StatPrint = (int)PETSC_TRUE;
539     } else {
540       lu->StatPrint = (int)PETSC_FALSE;
541     }
542     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
543                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
544   PetscOptionsEnd();
545 
546   /* Initialize the SuperLU process grid. */
547   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
548 
549   /* Initialize ScalePermstruct and LUstruct. */
550   ScalePermstructInit(M, N, &lu->ScalePermstruct);
551   LUstructInit(M, N, &lu->LUstruct);
552 
553   lu->options            = options;
554   lu->flg                = DIFFERENT_NONZERO_PATTERN;
555   lu->CleanUpSuperLUDist = PETSC_TRUE;
556   *F = B;
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatUseSuperLU_DIST_MPIAIJ"
562 int MatUseSuperLU_DIST_MPIAIJ(Mat A)
563 {
564   PetscFunctionBegin;
565   A->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST;
566   A->ops->lufactornumeric  = MatLUFactorNumeric_MPIAIJ_SuperLU_DIST;
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "MatMPIAIJFactorInfo_SuperLu"
572 int MatMPIAIJFactorInfo_SuperLu(Mat A,PetscViewer viewer)
573 {
574   Mat_MPIAIJ_SuperLU_DIST *lu= (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
575   superlu_options_t       options;
576   int                     ierr;
577   char                    *colperm;
578 
579   PetscFunctionBegin;
580   /* check if matrix is superlu_dist type */
581   if (A->ops->solve != MatSolve_MPIAIJ_SuperLU_DIST) PetscFunctionReturn(0);
582 
583   options = lu->options;
584   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
585   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
586   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
587   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
588   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
589   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
590   if (options.ColPerm == NATURAL) {
591     colperm = "NATURAL";
592   } else if (options.ColPerm == MMD_AT_PLUS_A) {
593     colperm = "MMD_AT_PLUS_A";
594   } else if (options.ColPerm == MMD_ATA) {
595     colperm = "MMD_ATA";
596   } else if (options.ColPerm == COLAMD) {
597     colperm = "COLAMD";
598   } else {
599     SETERRQ(1,"Unknown column permutation");
600   }
601   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
602   PetscFunctionReturn(0);
603 }
604 
605 EXTERN_C_BEGIN
606 #undef __FUNCT__
607 #define __FUNCT__ "MatCreate_MPIAIJ_SuperLU_DIST"
608 int MatCreate_MPIAIJ_SuperLU_DIST(Mat A) {
609   int                     ierr,size;
610   MPI_Comm                comm;
611   Mat_MPIAIJ_SuperLU_DIST *lu;
612 
613   PetscFunctionBegin;
614   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
615   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
616   if (size == 1) {
617     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
618   } else {
619     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
620   }
621   ierr = MatUseSuperLU_DIST_MPIAIJ(A);CHKERRQ(ierr);
622 
623   ierr                   = PetscNew(Mat_MPIAIJ_SuperLU_DIST,&lu);CHKERRQ(ierr);
624   lu->MatView            = A->ops->view;
625   lu->MatAssemblyEnd     = A->ops->assemblyend;
626   lu->MatDestroy         = A->ops->destroy;
627   lu->CleanUpSuperLUDist = PETSC_FALSE;
628   A->spptr               = (void*)lu;
629   A->ops->view           = MatView_MPIAIJ_SuperLU_DIST;
630   A->ops->assemblyend    = MatAssemblyEnd_MPIAIJ_SuperLU_DIST;
631   A->ops->destroy        = MatDestroy_MPIAIJ_SuperLU_DIST;
632   PetscFunctionReturn(0);
633 }
634 EXTERN_C_END
635 
636 EXTERN_C_BEGIN
637 #undef __FUNCT__
638 #define __FUNCT__ "MatLoad_MPIAIJ_SuperLU_DIST"
639 int MatLoad_MPIAIJ_SuperLU_DIST(PetscViewer viewer,MatType type,Mat *A) {
640   int      ierr,size,(*r)(PetscViewer,MatType,Mat*);
641   MPI_Comm comm;
642 
643   PetscFunctionBegin;
644   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
645   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
646   if (size == 1) {
647     ierr = PetscFListFind(comm,MatLoadList,MATSEQAIJ,(void(**)(void))&r);CHKERRQ(ierr);
648   } else {
649     ierr = PetscFListFind(comm,MatLoadList,MATMPIAIJ,(void(**)(void))&r);CHKERRQ(ierr);
650   }
651   ierr = (*r)(viewer,type,A);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 EXTERN_C_END
655