xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 82094794e2b9d059cc8370a7dbd4702a5e945ede)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.2 sparse solver
5 */
6 
7 #include "../src/mat/impls/aij/seq/aij.h"
8 #include "../src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 #if defined(PETSC_USE_64BIT_INDICES)
14 /* ugly SuperLU_Dist variable telling it to use long long int */
15 #define _LONGINT
16 #endif
17 
18 EXTERN_C_BEGIN
19 #if defined(PETSC_USE_COMPLEX)
20 #include "superlu_zdefs.h"
21 #else
22 #include "superlu_ddefs.h"
23 #endif
24 EXTERN_C_END
25 
26 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
27 const char *SuperLU_MatInputModes[]    = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0};
28 
29 typedef struct {
30   int_t                   nprow,npcol,*row,*col;
31   gridinfo_t              grid;
32   superlu_options_t       options;
33   SuperMatrix             A_sup;
34   ScalePermstruct_t       ScalePermstruct;
35   LUstruct_t              LUstruct;
36   int                     StatPrint;
37   SuperLU_MatInputMode    MatInputMode;
38   SOLVEstruct_t           SOLVEstruct;
39   fact_t                  FactPattern;
40   MPI_Comm                comm_superlu;
41 #if defined(PETSC_USE_COMPLEX)
42   doublecomplex           *val;
43 #else
44   double                  *val;
45 #endif
46 
47   /* Flag to clean up (non-global) SuperLU objects during Destroy */
48   PetscTruth CleanUpSuperLU_Dist;
49 } Mat_SuperLU_DIST;
50 
51 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
52 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,Mat,const MatFactorInfo *);
53 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
54 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
55 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
56 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,Mat,IS,IS,const MatFactorInfo *);
57 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
61 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
62 {
63   PetscErrorCode   ierr;
64   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
65   PetscTruth       flg;
66 
67   PetscFunctionBegin;
68   if (lu->CleanUpSuperLU_Dist) {
69     /* Deallocate SuperLU_DIST storage */
70     if (lu->MatInputMode == GLOBAL) {
71       Destroy_CompCol_Matrix_dist(&lu->A_sup);
72     } else {
73       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
74       if ( lu->options.SolveInitialized ) {
75 #if defined(PETSC_USE_COMPLEX)
76         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
77 #else
78         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
79 #endif
80       }
81     }
82     Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
83     ScalePermstructFree(&lu->ScalePermstruct);
84     LUstructFree(&lu->LUstruct);
85 
86     /* Release the SuperLU_DIST process grid. */
87     superlu_gridexit(&lu->grid);
88 
89     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
90   }
91 
92   ierr = PetscTypeCompare((PetscObject)A,MATSEQAIJ,&flg);CHKERRQ(ierr);
93   if (flg) {
94     ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
95   } else {
96     ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
97   }
98   PetscFunctionReturn(0);
99 }
100 
101 #undef __FUNCT__
102 #define __FUNCT__ "MatSolve_SuperLU_DIST"
103 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
104 {
105   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
106   PetscErrorCode   ierr;
107   PetscMPIInt      size;
108   PetscInt         m=A->rmap->N, N=A->cmap->N;
109   SuperLUStat_t    stat;
110   double           berr[1];
111   PetscScalar      *bptr;
112   PetscInt         nrhs=1;
113   Vec              x_seq;
114   IS               iden;
115   VecScatter       scat;
116   int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
117 
118   PetscFunctionBegin;
119   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
120   if (size > 1) {
121     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
122       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
123       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
124       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
125       ierr = ISDestroy(iden);CHKERRQ(ierr);
126 
127       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
128       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
129       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
130     } else { /* distributed mat input */
131       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
132       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
133     }
134   } else { /* size == 1 */
135     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
136     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
137   }
138 
139   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
140 
141   PStatInit(&stat);        /* Initialize the statistics variables. */
142   if (lu->MatInputMode == GLOBAL) {
143 #if defined(PETSC_USE_COMPLEX)
144     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
145                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
146 #else
147     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
148                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
149 #endif
150   } else { /* distributed mat input */
151 #if defined(PETSC_USE_COMPLEX)
152     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid,
153 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
154     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
155 #else
156     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid,
157 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
158     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
159 #endif
160   }
161   if (lu->options.PrintStat) {
162      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
163   }
164   PStatFree(&stat);
165 
166   if (size > 1) {
167     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
168       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
169       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
170       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
171       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
172       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
173     } else {
174       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
175     }
176   } else {
177     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
178   }
179   PetscFunctionReturn(0);
180 }
181 
182 #undef __FUNCT__
183 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
184 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
185 {
186   Mat              *tseq,A_seq = PETSC_NULL;
187   Mat_SeqAIJ       *aa,*bb;
188   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
189   PetscErrorCode   ierr;
190   PetscInt         M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
191                    m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
192   int              sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
193   PetscMPIInt      size,rank;
194   SuperLUStat_t    stat;
195   double           *berr=0;
196   IS               isrow;
197   PetscLogDouble   time0,time,time_min,time_max;
198   Mat              F_diag=PETSC_NULL;
199 #if defined(PETSC_USE_COMPLEX)
200   doublecomplex    *av, *bv;
201 #else
202   double           *av, *bv;
203 #endif
204 
205   PetscFunctionBegin;
206   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
207   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
208 
209   if (lu->options.PrintStat) { /* collect time for mat conversion */
210     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
211     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
212   }
213 
214   if (lu->MatInputMode == GLOBAL) { /* global mat input */
215     if (size > 1) { /* convert mpi A to seq mat A */
216       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
217       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
218       ierr = ISDestroy(isrow);CHKERRQ(ierr);
219 
220       A_seq = *tseq;
221       ierr = PetscFree(tseq);CHKERRQ(ierr);
222       aa =  (Mat_SeqAIJ*)A_seq->data;
223     } else {
224       PetscTruth flg;
225       ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
226       if (flg) {
227         Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data;
228         A = At->A;
229       }
230       aa =  (Mat_SeqAIJ*)A->data;
231     }
232 
233     /* Convert Petsc NR matrix to SuperLU_DIST NC.
234        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
235     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
236       if (lu->FactPattern == SamePattern_SameRowPerm){
237         Destroy_CompCol_Matrix_dist(&lu->A_sup);
238         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
239         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
240       } else {
241         Destroy_CompCol_Matrix_dist(&lu->A_sup);
242         Destroy_LU(N, &lu->grid, &lu->LUstruct);
243         lu->options.Fact = SamePattern;
244       }
245     }
246 #if defined(PETSC_USE_COMPLEX)
247     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
248 #else
249     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
250 #endif
251 
252     /* Create compressed column matrix A_sup. */
253 #if defined(PETSC_USE_COMPLEX)
254     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
255 #else
256     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
257 #endif
258   } else { /* distributed mat input */
259     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
260     aa=(Mat_SeqAIJ*)(mat->A)->data;
261     bb=(Mat_SeqAIJ*)(mat->B)->data;
262     ai=aa->i; aj=aa->j;
263     bi=bb->i; bj=bb->j;
264 #if defined(PETSC_USE_COMPLEX)
265     av=(doublecomplex*)aa->a;
266     bv=(doublecomplex*)bb->a;
267 #else
268     av=aa->a;
269     bv=bb->a;
270 #endif
271     rstart = A->rmap->rstart;
272     nz     = aa->nz + bb->nz;
273     garray = mat->garray;
274 
275     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
276 #if defined(PETSC_USE_COMPLEX)
277       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
278 #else
279       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
280 #endif
281     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
282       if (lu->FactPattern == SamePattern_SameRowPerm){
283         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
284         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
285       } else {
286         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
287         lu->options.Fact = SamePattern;
288       }
289     }
290     nz = 0; irow = rstart;
291     for ( i=0; i<m; i++ ) {
292       lu->row[i] = nz;
293       countA = ai[i+1] - ai[i];
294       countB = bi[i+1] - bi[i];
295       ajj = aj + ai[i];  /* ptr to the beginning of this row */
296       bjj = bj + bi[i];
297 
298       /* B part, smaller col index */
299       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
300       jB = 0;
301       for (j=0; j<countB; j++){
302         jcol = garray[bjj[j]];
303         if (jcol > colA_start) {
304           jB = j;
305           break;
306         }
307         lu->col[nz] = jcol;
308         lu->val[nz++] = *bv++;
309         if (j==countB-1) jB = countB;
310       }
311 
312       /* A part */
313       for (j=0; j<countA; j++){
314         lu->col[nz] = rstart + ajj[j];
315         lu->val[nz++] = *av++;
316       }
317 
318       /* B part, larger col index */
319       for (j=jB; j<countB; j++){
320         lu->col[nz] = garray[bjj[j]];
321         lu->val[nz++] = *bv++;
322       }
323     }
324     lu->row[m] = nz;
325 #if defined(PETSC_USE_COMPLEX)
326     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
327 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
328 #else
329     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
330 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
331 #endif
332   }
333   if (lu->options.PrintStat) {
334     ierr = PetscGetTime(&time);CHKERRQ(ierr);
335     time0 = time - time0;
336   }
337 
338   /* Factor the matrix. */
339   PStatInit(&stat);   /* Initialize the statistics variables. */
340   CHKMEMQ;
341   if (lu->MatInputMode == GLOBAL) { /* global mat input */
342 #if defined(PETSC_USE_COMPLEX)
343     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
344                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
345 #else
346     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
347                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
348 #endif
349   } else { /* distributed mat input */
350 #if defined(PETSC_USE_COMPLEX)
351     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
352 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
353     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
354 #else
355     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
356 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
357     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
358 #endif
359   }
360 
361   if (lu->MatInputMode == GLOBAL && size > 1){
362     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
363   }
364 
365   if (lu->options.PrintStat) {
366     if (size > 1){
367       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
368       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
369       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
370       time = time/size; /* average time */
371       if (!rank) {
372         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
373       }
374     } else {
375       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
376     }
377     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
378   }
379   PStatFree(&stat);
380   if (size > 1){
381     F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
382     F_diag->assembled = PETSC_TRUE;
383   }
384   (F)->assembled    = PETSC_TRUE;
385   (F)->preallocated = PETSC_TRUE;
386   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
387   PetscFunctionReturn(0);
388 }
389 
390 /* Note the Petsc r and c permutations are ignored */
391 #undef __FUNCT__
392 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
393 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
394 {
395   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST*) (F)->spptr;
396   PetscInt          M=A->rmap->N,N=A->cmap->N;
397 
398   PetscFunctionBegin;
399   /* Initialize the SuperLU process grid. */
400   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
401 
402   /* Initialize ScalePermstruct and LUstruct. */
403   ScalePermstructInit(M, N, &lu->ScalePermstruct);
404   LUstructInit(M, N, &lu->LUstruct);
405   (F)->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
406   (F)->ops->solve            = MatSolve_SuperLU_DIST;
407   PetscFunctionReturn(0);
408 }
409 
410 EXTERN_C_BEGIN
411 #undef __FUNCT__
412 #define __FUNCT__ "MatFactorGetSolverPackage_aij_superlu_dist"
413 PetscErrorCode MatFactorGetSolverPackage_aij_superlu_dist(Mat A,const MatSolverPackage *type)
414 {
415   PetscFunctionBegin;
416   *type = MAT_SOLVER_SUPERLU_DIST;
417   PetscFunctionReturn(0);
418 }
419 EXTERN_C_END
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatGetFactor_aij_superlu_dist"
423 PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
424 {
425   Mat               B;
426   Mat_SuperLU_DIST  *lu;
427   PetscErrorCode    ierr;
428   PetscInt          M=A->rmap->N,N=A->cmap->N,indx;
429   PetscMPIInt       size;
430   superlu_options_t options;
431   PetscTruth        flg;
432   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
433   const char        *prtype[] = {"LargeDiag","NATURAL"};
434   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
435 
436   PetscFunctionBegin;
437   /* Create the factorization matrix */
438   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
439   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr);
440   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
441   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
442   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
443 
444   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
445   B->ops->view             = MatView_SuperLU_DIST;
446   B->ops->destroy          = MatDestroy_SuperLU_DIST;
447   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_aij_superlu_dist",MatFactorGetSolverPackage_aij_superlu_dist);CHKERRQ(ierr);
448   B->factor                = MAT_FACTOR_LU;
449 
450   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
451   B->spptr = lu;
452 
453   /* Set the default input options:
454      options.Fact              = DOFACT;
455      options.Equil             = YES;
456      options.ParSymbFact       = NO;
457      options.ColPerm           = MMD_AT_PLUS_A;
458      options.RowPerm           = LargeDiag;
459      options.ReplaceTinyPivot  = YES;
460      options.IterRefine        = DOUBLE;
461      options.Trans             = NOTRANS;
462      options.SolveInitialized  = NO;
463      options.RefineInitialized = NO;
464      options.PrintStat         = YES;
465   */
466   set_default_options_dist(&options);
467 
468   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr);
469   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
470   /* Default num of process columns and rows */
471   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
472   if (!lu->npcol) lu->npcol = 1;
473   while (lu->npcol > 0) {
474     lu->nprow = PetscMPIIntCast(size/lu->npcol);
475     if (size == lu->nprow * lu->npcol) break;
476     lu->npcol --;
477   }
478 
479   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
480     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
481     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
482     if (size != lu->nprow * lu->npcol)
483       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
484 
485     lu->MatInputMode = DISTRIBUTED;
486     ierr = PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
487     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
488 
489     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
490     if (!flg) {
491       options.Equil = NO;
492     }
493 
494     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
495     if (flg) {
496       switch (indx) {
497       case 0:
498         options.RowPerm = LargeDiag;
499         break;
500       case 1:
501         options.RowPerm = NOROWPERM;
502         break;
503       }
504     }
505 
506     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr);
507     if (flg) {
508       switch (indx) {
509       case 0:
510         options.ColPerm = MMD_AT_PLUS_A;
511         break;
512       case 1:
513         options.ColPerm = NATURAL;
514         break;
515       case 2:
516         options.ColPerm = MMD_ATA;
517         break;
518       case 3:
519         options.ColPerm = PARMETIS;
520         break;
521       }
522     }
523 
524     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
525     if (!flg) {
526       options.ReplaceTinyPivot = NO;
527     }
528 
529     options.ParSymbFact = NO;
530     ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
531     if (flg){
532 #ifdef PETSC_HAVE_PARMETIS
533       options.ParSymbFact = YES;
534       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
535 #else
536       printf("parsymbfact needs PARMETIS");
537 #endif
538     }
539 
540     lu->FactPattern = SamePattern;
541     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
542     if (flg) {
543       switch (indx) {
544       case 0:
545         lu->FactPattern = SamePattern;
546         break;
547       case 1:
548         lu->FactPattern = SamePattern_SameRowPerm;
549         break;
550       }
551     }
552 
553     options.IterRefine = NOREFINE;
554     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
555     if (flg) {
556       options.IterRefine = DOUBLE;
557     }
558 
559     if (PetscLogPrintInfo) {
560       options.PrintStat = YES;
561     } else {
562       options.PrintStat = NO;
563     }
564     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
565                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
566   PetscOptionsEnd();
567 
568   lu->options             = options;
569   lu->options.Fact        = DOFACT;
570   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
571   *F = B;
572   PetscFunctionReturn(0);
573 }
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "MatGetFactor_seqaij_superlu_dist"
578 PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
579 {
580   PetscErrorCode ierr;
581 
582   PetscFunctionBegin;
583   ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 EXTERN_C_END
587 
588 EXTERN_C_BEGIN
589 #undef __FUNCT__
590 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist"
591 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
592 {
593   PetscErrorCode ierr;
594 
595   PetscFunctionBegin;
596   ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr);
597   PetscFunctionReturn(0);
598 }
599 EXTERN_C_END
600 
601 #undef __FUNCT__
602 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
603 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
604 {
605   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
606   superlu_options_t options;
607   PetscErrorCode    ierr;
608 
609   PetscFunctionBegin;
610   /* check if matrix is superlu_dist type */
611   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
612 
613   options = lu->options;
614   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
615   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
616   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
617   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
618   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
619   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
620   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
621   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
622   if (options.ColPerm == NATURAL) {
623     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
624   } else if (options.ColPerm == MMD_AT_PLUS_A) {
625     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
626   } else if (options.ColPerm == MMD_ATA) {
627     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
628   } else if (options.ColPerm == PARMETIS) {
629     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");CHKERRQ(ierr);
630   } else {
631     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
632   }
633 
634   ierr = PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr);
635 
636   if (lu->FactPattern == SamePattern){
637     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
638   } else {
639     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
640   }
641   PetscFunctionReturn(0);
642 }
643 
644 #undef __FUNCT__
645 #define __FUNCT__ "MatView_SuperLU_DIST"
646 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
647 {
648   PetscErrorCode    ierr;
649   PetscTruth        iascii;
650   PetscViewerFormat format;
651 
652   PetscFunctionBegin;
653   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
654   if (iascii) {
655     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
656     if (format == PETSC_VIEWER_ASCII_INFO) {
657       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
658     }
659   }
660   PetscFunctionReturn(0);
661 }
662 
663 
664 /*MC
665   MAT_SOLVER_SUPERLU_DIST - Parallel direct solver package for LU factorization
666 
667    Works with AIJ matrices
668 
669   Options Database Keys:
670 + -mat_superlu_dist_r <n> - number of rows in processor partition
671 . -mat_superlu_dist_c <n> - number of columns in processor partition
672 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
673 . -mat_superlu_dist_equil - equilibrate the matrix
674 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
675 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
676 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
677 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
678 . -mat_superlu_dist_iterrefine - use iterative refinement
679 - -mat_superlu_dist_statprint - print factorization information
680 
681    Level: beginner
682 
683 .seealso: PCLU
684 
685 .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage
686 
687 M*/
688 
689