xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision b24902e06ab141841ce6546a773244be2474cd18)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.2 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   fact_t                  FactPattern;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* Flag to clean up (non-global) SuperLU objects during Destroy */
42   PetscTruth CleanUpSuperLU_Dist;
43 } Mat_SuperLU_DIST;
44 
45 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
46 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *);
47 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
48 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
49 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
50 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *);
51 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
55 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
56 {
57   PetscErrorCode   ierr;
58   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
59 
60   PetscFunctionBegin;
61   if (lu->CleanUpSuperLU_Dist) {
62     /* Deallocate SuperLU_DIST storage */
63     if (lu->MatInputMode == GLOBAL) {
64       Destroy_CompCol_Matrix_dist(&lu->A_sup);
65     } else {
66       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
67       if ( lu->options.SolveInitialized ) {
68 #if defined(PETSC_USE_COMPLEX)
69         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
70 #else
71         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
72 #endif
73       }
74     }
75     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
76     ScalePermstructFree(&lu->ScalePermstruct);
77     LUstructFree(&lu->LUstruct);
78 
79     /* Release the SuperLU_DIST process grid. */
80     superlu_gridexit(&lu->grid);
81 
82     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
83   }
84 
85   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNCT__
90 #define __FUNCT__ "MatSolve_SuperLU_DIST"
91 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
92 {
93   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
94   PetscErrorCode   ierr;
95   PetscMPIInt      size;
96   PetscInt         m=A->rmap.N, N=A->cmap.N;
97   SuperLUStat_t    stat;
98   double           berr[1];
99   PetscScalar      *bptr;
100   PetscInt         info, nrhs=1;
101   Vec              x_seq;
102   IS               iden;
103   VecScatter       scat;
104 
105   PetscFunctionBegin;
106   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
107   if (size > 1) {
108     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
109       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
110       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
111       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
112       ierr = ISDestroy(iden);CHKERRQ(ierr);
113 
114       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
115       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
116       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
117     } else { /* distributed mat input */
118       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
119       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
120     }
121   } else { /* size == 1 */
122     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
123     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
124   }
125 
126   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
127 
128   PStatInit(&stat);        /* Initialize the statistics variables. */
129   if (lu->MatInputMode == GLOBAL) {
130 #if defined(PETSC_USE_COMPLEX)
131     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
132                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
133 #else
134     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
135                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
136 #endif
137   } else { /* distributed mat input */
138 #if defined(PETSC_USE_COMPLEX)
139     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
140 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
141     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
142 #else
143     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
144 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
145     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
146 #endif
147   }
148   if (lu->options.PrintStat) {
149      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
150   }
151   PStatFree(&stat);
152 
153   if (size > 1) {
154     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
155       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
156       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
157       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
158       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
159       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
160     } else {
161       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
162     }
163   } else {
164     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
165   }
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
171 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
172 {
173   Mat              *tseq,A_seq = PETSC_NULL;
174   Mat_SeqAIJ       *aa,*bb;
175   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
176   PetscErrorCode   ierr;
177   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
178                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
179   PetscMPIInt      size,rank;
180   SuperLUStat_t    stat;
181   double           *berr=0;
182   IS               isrow;
183   PetscLogDouble   time0,time,time_min,time_max;
184   Mat              F_diag=PETSC_NULL;
185 #if defined(PETSC_USE_COMPLEX)
186   doublecomplex    *av, *bv;
187 #else
188   double           *av, *bv;
189 #endif
190 
191   PetscFunctionBegin;
192   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
193   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
194 
195   if (lu->options.PrintStat) { /* collect time for mat conversion */
196     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
197     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
198   }
199 
200   if (lu->MatInputMode == GLOBAL) { /* global mat input */
201     if (size > 1) { /* convert mpi A to seq mat A */
202       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
203       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
204       ierr = ISDestroy(isrow);CHKERRQ(ierr);
205 
206       A_seq = *tseq;
207       ierr = PetscFree(tseq);CHKERRQ(ierr);
208       aa =  (Mat_SeqAIJ*)A_seq->data;
209     } else {
210       aa =  (Mat_SeqAIJ*)A->data;
211     }
212 
213     /* Convert Petsc NR matrix to SuperLU_DIST NC.
214        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
215     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
216       if (lu->FactPattern == SamePattern_SameRowPerm){
217         Destroy_CompCol_Matrix_dist(&lu->A_sup);
218         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
219         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
220       } else {
221         Destroy_CompCol_Matrix_dist(&lu->A_sup);
222         Destroy_LU(N, &lu->grid, &lu->LUstruct);
223         lu->options.Fact = SamePattern;
224       }
225     }
226 #if defined(PETSC_USE_COMPLEX)
227     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
228 #else
229     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
230 #endif
231 
232     /* Create compressed column matrix A_sup. */
233 #if defined(PETSC_USE_COMPLEX)
234     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
235 #else
236     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
237 #endif
238   } else { /* distributed mat input */
239     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
240     aa=(Mat_SeqAIJ*)(mat->A)->data;
241     bb=(Mat_SeqAIJ*)(mat->B)->data;
242     ai=aa->i; aj=aa->j;
243     bi=bb->i; bj=bb->j;
244 #if defined(PETSC_USE_COMPLEX)
245     av=(doublecomplex*)aa->a;
246     bv=(doublecomplex*)bb->a;
247 #else
248     av=aa->a;
249     bv=bb->a;
250 #endif
251     rstart = A->rmap.rstart;
252     nz     = aa->nz + bb->nz;
253     garray = mat->garray;
254 
255     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
256 #if defined(PETSC_USE_COMPLEX)
257       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
258 #else
259       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
260 #endif
261     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
262       if (lu->FactPattern == SamePattern_SameRowPerm){
263         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
264         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
265       } else {
266         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
267         lu->options.Fact = SamePattern;
268       }
269     }
270     nz = 0; irow = rstart;
271     for ( i=0; i<m; i++ ) {
272       lu->row[i] = nz;
273       countA = ai[i+1] - ai[i];
274       countB = bi[i+1] - bi[i];
275       ajj = aj + ai[i];  /* ptr to the beginning of this row */
276       bjj = bj + bi[i];
277 
278       /* B part, smaller col index */
279       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
280       jB = 0;
281       for (j=0; j<countB; j++){
282         jcol = garray[bjj[j]];
283         if (jcol > colA_start) {
284           jB = j;
285           break;
286         }
287         lu->col[nz] = jcol;
288         lu->val[nz++] = *bv++;
289         if (j==countB-1) jB = countB;
290       }
291 
292       /* A part */
293       for (j=0; j<countA; j++){
294         lu->col[nz] = rstart + ajj[j];
295         lu->val[nz++] = *av++;
296       }
297 
298       /* B part, larger col index */
299       for (j=jB; j<countB; j++){
300         lu->col[nz] = garray[bjj[j]];
301         lu->val[nz++] = *bv++;
302       }
303     }
304     lu->row[m] = nz;
305 #if defined(PETSC_USE_COMPLEX)
306     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
307 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
308 #else
309     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
310 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
311 #endif
312   }
313   if (lu->options.PrintStat) {
314     ierr = PetscGetTime(&time);CHKERRQ(ierr);
315     time0 = time - time0;
316   }
317 
318   /* Factor the matrix. */
319   PStatInit(&stat);   /* Initialize the statistics variables. */
320 
321   if (lu->MatInputMode == GLOBAL) { /* global mat input */
322 #if defined(PETSC_USE_COMPLEX)
323     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
324                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
325 #else
326     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
327                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
328 #endif
329   } else { /* distributed mat input */
330 #if defined(PETSC_USE_COMPLEX)
331     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
332 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
333     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
334 #else
335     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
336 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
337     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
338 #endif
339   }
340 
341   if (lu->MatInputMode == GLOBAL && size > 1){
342     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
343   }
344 
345   if (lu->options.PrintStat) {
346     if (size > 1){
347       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
348       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
349       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
350       time = time/size; /* average time */
351       if (!rank) {
352         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
353       }
354     } else {
355       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
356     }
357     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
358   }
359   PStatFree(&stat);
360   if (size > 1){
361     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
362     F_diag->assembled = PETSC_TRUE;
363   }
364   (*F)->assembled  = PETSC_TRUE;
365   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
366   PetscFunctionReturn(0);
367 }
368 
369 /* Note the Petsc r and c permutations are ignored */
370 #undef __FUNCT__
371 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
372 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
373 {
374   Mat               B;
375   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST*) (*F)->spptr;
376   PetscErrorCode    ierr;
377   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
378 
379   PetscFunctionBegin;
380   /* Initialize the SuperLU process grid. */
381   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
382 
383   /* Initialize ScalePermstruct and LUstruct. */
384   ScalePermstructInit(M, N, &lu->ScalePermstruct);
385   LUstructInit(M, N, &lu->LUstruct);
386   PetscFunctionReturn(0);
387 }
388 
389 #undef __FUNCT__
390 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist"
391 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
392 {
393   Mat               B;
394   Mat_SuperLU_DIST  *lu;
395   PetscErrorCode    ierr;
396   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
397   PetscMPIInt       size;
398   superlu_options_t options;
399   PetscTruth        flg;
400   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
401   const char        *prtype[] = {"LargeDiag","NATURAL"};
402   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
403 
404   PetscFunctionBegin;
405   /* Create the factorization matrix */
406   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
407   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
408   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
409   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
410   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
411 
412   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
413   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
414   B->ops->solve            = MatSolve_SuperLU_DIST;
415   B->factor                = FACTOR_LU;
416 
417   lu = (Mat_SuperLU_DIST*)(B->spptr);
418 
419   /* Set the default input options:
420      options.Fact              = DOFACT;
421      options.Equil             = YES;
422      options.ParSymbFact       = NO;
423      options.ColPerm           = MMD_AT_PLUS_A;
424      options.RowPerm           = LargeDiag;
425      options.ReplaceTinyPivot  = YES;
426      options.IterRefine        = DOUBLE;
427      options.Trans             = NOTRANS;
428      options.SolveInitialized  = NO;
429      options.RefineInitialized = NO;
430      options.PrintStat         = YES;
431   */
432   set_default_options_dist(&options);
433 
434   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr);
435   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
436   /* Default num of process columns and rows */
437   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
438   if (!lu->npcol) lu->npcol = 1;
439   while (lu->npcol > 0) {
440     lu->nprow = PetscMPIIntCast(size/lu->npcol);
441     if (size == lu->nprow * lu->npcol) break;
442     lu->npcol --;
443   }
444 
445   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
446     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
447     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
448     if (size != lu->nprow * lu->npcol)
449       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
450 
451     lu->MatInputMode = DISTRIBUTED;
452     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
453     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
454 
455     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
456     if (!flg) {
457       options.Equil = NO;
458     }
459 
460     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
461     if (flg) {
462       switch (indx) {
463       case 0:
464         options.RowPerm = LargeDiag;
465         break;
466       case 1:
467         options.RowPerm = NOROWPERM;
468         break;
469       }
470     }
471 
472     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr);
473     if (flg) {
474       switch (indx) {
475       case 0:
476         options.ColPerm = MMD_AT_PLUS_A;
477         break;
478       case 1:
479         options.ColPerm = NATURAL;
480         break;
481       case 2:
482         options.ColPerm = MMD_ATA;
483         break;
484       case 3:
485         options.ColPerm = PARMETIS;
486         break;
487       }
488     }
489 
490     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
491     if (!flg) {
492       options.ReplaceTinyPivot = NO;
493     }
494 
495     options.ParSymbFact = NO;
496     ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
497     if (flg){
498 #ifdef PETSC_HAVE_PARMETIS
499       options.ParSymbFact = YES;
500       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
501 #else
502       printf("parsymbfact needs PARMETIS");
503 #endif
504     }
505 
506     lu->FactPattern = SamePattern;
507     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
508     if (flg) {
509       switch (indx) {
510       case 0:
511         lu->FactPattern = SamePattern;
512         break;
513       case 1:
514         lu->FactPattern = SamePattern_SameRowPerm;
515         break;
516       }
517     }
518 
519     options.IterRefine = NOREFINE;
520     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
521     if (flg) {
522       options.IterRefine = DOUBLE;
523     }
524 
525     if (PetscLogPrintInfo) {
526       options.PrintStat = YES;
527     } else {
528       options.PrintStat = NO;
529     }
530     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
531                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
532   PetscOptionsEnd();
533 
534   lu->options             = options;
535   lu->options.Fact        = DOFACT;
536   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
537   *F = B;
538   PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
543 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
544 {
545   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
546   superlu_options_t options;
547   PetscErrorCode    ierr;
548 
549   PetscFunctionBegin;
550   /* check if matrix is superlu_dist type */
551   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
552 
553   options = lu->options;
554   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
555   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
556   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
557   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
558   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
559   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
560   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
561   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
562   if (options.ColPerm == NATURAL) {
563     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
564   } else if (options.ColPerm == MMD_AT_PLUS_A) {
565     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
566   } else if (options.ColPerm == MMD_ATA) {
567     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
568   } else if (options.ColPerm == PARMETIS) {
569     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");CHKERRQ(ierr);
570   } else {
571     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
572   }
573 
574   ierr = PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr);
575 
576   if (lu->FactPattern == SamePattern){
577     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
578   } else {
579     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
580   }
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatView_SuperLU_DIST"
586 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
587 {
588   PetscErrorCode    ierr;
589   PetscTruth        iascii;
590   PetscViewerFormat format;
591   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
592 
593   PetscFunctionBegin;
594 
595   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
596   if (iascii) {
597     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
598     if (format == PETSC_VIEWER_ASCII_INFO) {
599       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
600     }
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 
606 /*MC
607   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
608   via the external package SuperLU_DIST.
609 
610   If SuperLU_DIST is installed (see the manual for
611   instructions on how to declare the existence of external packages),
612   a matrix type can be constructed which invokes SuperLU_DIST solvers.
613   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
614   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
615   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
616 
617   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
618   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
619   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
620   for communicators controlling multiple processes.  It is recommended that you call both of
621   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
622   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
623   without data copy; but this MUST be called AFTER the matrix values are set.
624 
625 
626 
627   Options Database Keys:
628 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
629 . -mat_superlu_dist_r <n> - number of rows in processor partition
630 . -mat_superlu_dist_c <n> - number of columns in processor partition
631 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
632 . -mat_superlu_dist_equil - equilibrate the matrix
633 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
634 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
635 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
636 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
637 . -mat_superlu_dist_iterrefine - use iterative refinement
638 - -mat_superlu_dist_statprint - print factorization information
639 
640    Level: beginner
641 
642 .seealso: PCLU
643 M*/
644 
645