xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision f3c0ef26f111f00eaf8fcbc644792dfa599d0a42)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.2 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   fact_t                  FactPattern;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* Flag to clean up (non-global) SuperLU objects during Destroy */
42   PetscTruth CleanUpSuperLU_Dist;
43 } Mat_SuperLU_DIST;
44 
45 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
46 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,Mat,const MatFactorInfo *);
47 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
48 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
49 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
50 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,Mat,IS,IS,const MatFactorInfo *);
51 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
55 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
56 {
57   PetscErrorCode   ierr;
58   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
59 
60   PetscFunctionBegin;
61   if (lu->CleanUpSuperLU_Dist) {
62     /* Deallocate SuperLU_DIST storage */
63     if (lu->MatInputMode == GLOBAL) {
64       Destroy_CompCol_Matrix_dist(&lu->A_sup);
65     } else {
66       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
67       if ( lu->options.SolveInitialized ) {
68 #if defined(PETSC_USE_COMPLEX)
69         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
70 #else
71         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
72 #endif
73       }
74     }
75     Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
76     ScalePermstructFree(&lu->ScalePermstruct);
77     LUstructFree(&lu->LUstruct);
78 
79     /* Release the SuperLU_DIST process grid. */
80     superlu_gridexit(&lu->grid);
81 
82     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
83   }
84 
85   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNCT__
90 #define __FUNCT__ "MatSolve_SuperLU_DIST"
91 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
92 {
93   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
94   PetscErrorCode   ierr;
95   PetscMPIInt      size;
96   PetscInt         m=A->rmap->N, N=A->cmap->N;
97   SuperLUStat_t    stat;
98   double           berr[1];
99   PetscScalar      *bptr;
100   PetscInt         info, nrhs=1;
101   Vec              x_seq;
102   IS               iden;
103   VecScatter       scat;
104 
105   PetscFunctionBegin;
106   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
107   if (size > 1) {
108     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
109       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
110       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
111       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
112       ierr = ISDestroy(iden);CHKERRQ(ierr);
113 
114       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
115       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
116       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
117     } else { /* distributed mat input */
118       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
119       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
120     }
121   } else { /* size == 1 */
122     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
123     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
124   }
125 
126   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
127 
128   PStatInit(&stat);        /* Initialize the statistics variables. */
129   if (lu->MatInputMode == GLOBAL) {
130 #if defined(PETSC_USE_COMPLEX)
131     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
132                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
133 #else
134     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
135                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
136 #endif
137   } else { /* distributed mat input */
138 #if defined(PETSC_USE_COMPLEX)
139     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid,
140 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
141     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
142 #else
143     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid,
144 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
145     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
146 #endif
147   }
148   if (lu->options.PrintStat) {
149      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
150   }
151   PStatFree(&stat);
152 
153   if (size > 1) {
154     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
155       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
156       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
157       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
158       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
159       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
160     } else {
161       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
162     }
163   } else {
164     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
165   }
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
171 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
172 {
173   Mat              *tseq,A_seq = PETSC_NULL;
174   Mat_SeqAIJ       *aa,*bb;
175   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
176   PetscErrorCode   ierr;
177   PetscInt         M=A->rmap->N,N=A->cmap->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
178                    m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
179   PetscMPIInt      size,rank;
180   SuperLUStat_t    stat;
181   double           *berr=0;
182   IS               isrow;
183   PetscLogDouble   time0,time,time_min,time_max;
184   Mat              F_diag=PETSC_NULL;
185 #if defined(PETSC_USE_COMPLEX)
186   doublecomplex    *av, *bv;
187 #else
188   double           *av, *bv;
189 #endif
190 
191   PetscFunctionBegin;
192   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
193   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
194 
195   if (lu->options.PrintStat) { /* collect time for mat conversion */
196     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
197     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
198   }
199 
200   if (lu->MatInputMode == GLOBAL) { /* global mat input */
201     if (size > 1) { /* convert mpi A to seq mat A */
202       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
203       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
204       ierr = ISDestroy(isrow);CHKERRQ(ierr);
205 
206       A_seq = *tseq;
207       ierr = PetscFree(tseq);CHKERRQ(ierr);
208       aa =  (Mat_SeqAIJ*)A_seq->data;
209     } else {
210       aa =  (Mat_SeqAIJ*)A->data;
211     }
212 
213     /* Convert Petsc NR matrix to SuperLU_DIST NC.
214        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
215     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
216       if (lu->FactPattern == SamePattern_SameRowPerm){
217         Destroy_CompCol_Matrix_dist(&lu->A_sup);
218         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
219         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
220       } else {
221         Destroy_CompCol_Matrix_dist(&lu->A_sup);
222         Destroy_LU(N, &lu->grid, &lu->LUstruct);
223         lu->options.Fact = SamePattern;
224       }
225     }
226 #if defined(PETSC_USE_COMPLEX)
227     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
228 #else
229     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
230 #endif
231 
232     /* Create compressed column matrix A_sup. */
233 #if defined(PETSC_USE_COMPLEX)
234     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
235 #else
236     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
237 #endif
238   } else { /* distributed mat input */
239     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
240     aa=(Mat_SeqAIJ*)(mat->A)->data;
241     bb=(Mat_SeqAIJ*)(mat->B)->data;
242     ai=aa->i; aj=aa->j;
243     bi=bb->i; bj=bb->j;
244 #if defined(PETSC_USE_COMPLEX)
245     av=(doublecomplex*)aa->a;
246     bv=(doublecomplex*)bb->a;
247 #else
248     av=aa->a;
249     bv=bb->a;
250 #endif
251     rstart = A->rmap->rstart;
252     nz     = aa->nz + bb->nz;
253     garray = mat->garray;
254 
255     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
256 #if defined(PETSC_USE_COMPLEX)
257       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
258 #else
259       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
260 #endif
261     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
262       if (lu->FactPattern == SamePattern_SameRowPerm){
263         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
264         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
265       } else {
266         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
267         lu->options.Fact = SamePattern;
268       }
269     }
270     nz = 0; irow = rstart;
271     for ( i=0; i<m; i++ ) {
272       lu->row[i] = nz;
273       countA = ai[i+1] - ai[i];
274       countB = bi[i+1] - bi[i];
275       ajj = aj + ai[i];  /* ptr to the beginning of this row */
276       bjj = bj + bi[i];
277 
278       /* B part, smaller col index */
279       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
280       jB = 0;
281       for (j=0; j<countB; j++){
282         jcol = garray[bjj[j]];
283         if (jcol > colA_start) {
284           jB = j;
285           break;
286         }
287         lu->col[nz] = jcol;
288         lu->val[nz++] = *bv++;
289         if (j==countB-1) jB = countB;
290       }
291 
292       /* A part */
293       for (j=0; j<countA; j++){
294         lu->col[nz] = rstart + ajj[j];
295         lu->val[nz++] = *av++;
296       }
297 
298       /* B part, larger col index */
299       for (j=jB; j<countB; j++){
300         lu->col[nz] = garray[bjj[j]];
301         lu->val[nz++] = *bv++;
302       }
303     }
304     lu->row[m] = nz;
305 #if defined(PETSC_USE_COMPLEX)
306     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
307 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
308 #else
309     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
310 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
311 #endif
312   }
313   if (lu->options.PrintStat) {
314     ierr = PetscGetTime(&time);CHKERRQ(ierr);
315     time0 = time - time0;
316   }
317 
318   /* Factor the matrix. */
319   PStatInit(&stat);   /* Initialize the statistics variables. */
320 
321   if (lu->MatInputMode == GLOBAL) { /* global mat input */
322 #if defined(PETSC_USE_COMPLEX)
323     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
324                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
325 #else
326     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
327                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
328 #endif
329   } else { /* distributed mat input */
330 #if defined(PETSC_USE_COMPLEX)
331     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
332 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
333     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
334 #else
335     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
336 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
337     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
338 #endif
339   }
340 
341   if (lu->MatInputMode == GLOBAL && size > 1){
342     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
343   }
344 
345   if (lu->options.PrintStat) {
346     if (size > 1){
347       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
348       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
349       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
350       time = time/size; /* average time */
351       if (!rank) {
352         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
353       }
354     } else {
355       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
356     }
357     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
358   }
359   PStatFree(&stat);
360   if (size > 1){
361     F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
362     F_diag->assembled = PETSC_TRUE;
363   }
364   (F)->assembled    = PETSC_TRUE;
365   (F)->preallocated = PETSC_TRUE;
366   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
367   PetscFunctionReturn(0);
368 }
369 
370 /* Note the Petsc r and c permutations are ignored */
371 #undef __FUNCT__
372 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
373 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
374 {
375   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST*) (F)->spptr;
376   PetscInt          M=A->rmap->N,N=A->cmap->N;
377 
378   PetscFunctionBegin;
379   /* Initialize the SuperLU process grid. */
380   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
381 
382   /* Initialize ScalePermstruct and LUstruct. */
383   ScalePermstructInit(M, N, &lu->ScalePermstruct);
384   LUstructInit(M, N, &lu->LUstruct);
385   (F)->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
386   (F)->ops->solve            = MatSolve_SuperLU_DIST;
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNCT__
391 #define __FUNCT__ "MatGetFactor_aij_superlu_dist"
392 PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
393 {
394   Mat               B;
395   Mat_SuperLU_DIST  *lu;
396   PetscErrorCode    ierr;
397   PetscInt          M=A->rmap->N,N=A->cmap->N,indx;
398   PetscMPIInt       size;
399   superlu_options_t options;
400   PetscTruth        flg;
401   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
402   const char        *prtype[] = {"LargeDiag","NATURAL"};
403   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
404 
405   PetscFunctionBegin;
406   /* Create the factorization matrix */
407   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
408   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr);
409   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
410   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
411   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
412 
413   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
414   B->ops->view             = MatView_SuperLU_DIST;
415   B->factor                = MAT_FACTOR_LU;
416 
417   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
418   B->spptr = lu;
419 
420   /* Set the default input options:
421      options.Fact              = DOFACT;
422      options.Equil             = YES;
423      options.ParSymbFact       = NO;
424      options.ColPerm           = MMD_AT_PLUS_A;
425      options.RowPerm           = LargeDiag;
426      options.ReplaceTinyPivot  = YES;
427      options.IterRefine        = DOUBLE;
428      options.Trans             = NOTRANS;
429      options.SolveInitialized  = NO;
430      options.RefineInitialized = NO;
431      options.PrintStat         = YES;
432   */
433   set_default_options_dist(&options);
434 
435   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr);
436   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
437   /* Default num of process columns and rows */
438   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
439   if (!lu->npcol) lu->npcol = 1;
440   while (lu->npcol > 0) {
441     lu->nprow = PetscMPIIntCast(size/lu->npcol);
442     if (size == lu->nprow * lu->npcol) break;
443     lu->npcol --;
444   }
445 
446   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
447     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
448     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
449     if (size != lu->nprow * lu->npcol)
450       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
451 
452     lu->MatInputMode = DISTRIBUTED;
453     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
454     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
455 
456     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
457     if (!flg) {
458       options.Equil = NO;
459     }
460 
461     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
462     if (flg) {
463       switch (indx) {
464       case 0:
465         options.RowPerm = LargeDiag;
466         break;
467       case 1:
468         options.RowPerm = NOROWPERM;
469         break;
470       }
471     }
472 
473     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr);
474     if (flg) {
475       switch (indx) {
476       case 0:
477         options.ColPerm = MMD_AT_PLUS_A;
478         break;
479       case 1:
480         options.ColPerm = NATURAL;
481         break;
482       case 2:
483         options.ColPerm = MMD_ATA;
484         break;
485       case 3:
486         options.ColPerm = PARMETIS;
487         break;
488       }
489     }
490 
491     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
492     if (!flg) {
493       options.ReplaceTinyPivot = NO;
494     }
495 
496     options.ParSymbFact = NO;
497     ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
498     if (flg){
499 #ifdef PETSC_HAVE_PARMETIS
500       options.ParSymbFact = YES;
501       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
502 #else
503       printf("parsymbfact needs PARMETIS");
504 #endif
505     }
506 
507     lu->FactPattern = SamePattern;
508     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
509     if (flg) {
510       switch (indx) {
511       case 0:
512         lu->FactPattern = SamePattern;
513         break;
514       case 1:
515         lu->FactPattern = SamePattern_SameRowPerm;
516         break;
517       }
518     }
519 
520     options.IterRefine = NOREFINE;
521     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
522     if (flg) {
523       options.IterRefine = DOUBLE;
524     }
525 
526     if (PetscLogPrintInfo) {
527       options.PrintStat = YES;
528     } else {
529       options.PrintStat = NO;
530     }
531     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
532                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
533   PetscOptionsEnd();
534 
535   lu->options             = options;
536   lu->options.Fact        = DOFACT;
537   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
538   *F = B;
539   PetscFunctionReturn(0);
540 }
541 
542 EXTERN_C_BEGIN
543 #undef __FUNCT__
544 #define __FUNCT__ "MatGetFactor_seqaij_superlu_dist"
545 PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
546 {
547   PetscErrorCode ierr;
548 
549   PetscFunctionBegin;
550   ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr);
551   PetscFunctionReturn(0);
552 }
553 EXTERN_C_END
554 
555 EXTERN_C_BEGIN
556 #undef __FUNCT__
557 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist"
558 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
559 {
560   PetscErrorCode ierr;
561 
562   PetscFunctionBegin;
563   ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 EXTERN_C_END
567 
568 #undef __FUNCT__
569 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
570 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
571 {
572   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
573   superlu_options_t options;
574   PetscErrorCode    ierr;
575 
576   PetscFunctionBegin;
577   /* check if matrix is superlu_dist type */
578   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
579 
580   options = lu->options;
581   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
582   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
583   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
584   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
585   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
586   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
587   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
588   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
589   if (options.ColPerm == NATURAL) {
590     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
591   } else if (options.ColPerm == MMD_AT_PLUS_A) {
592     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
593   } else if (options.ColPerm == MMD_ATA) {
594     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
595   } else if (options.ColPerm == PARMETIS) {
596     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");CHKERRQ(ierr);
597   } else {
598     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
599   }
600 
601   ierr = PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr);
602 
603   if (lu->FactPattern == SamePattern){
604     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
605   } else {
606     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
607   }
608   PetscFunctionReturn(0);
609 }
610 
611 #undef __FUNCT__
612 #define __FUNCT__ "MatView_SuperLU_DIST"
613 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
614 {
615   PetscErrorCode    ierr;
616   PetscTruth        iascii;
617   PetscViewerFormat format;
618 
619   PetscFunctionBegin;
620   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
621   if (iascii) {
622     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
623     if (format == PETSC_VIEWER_ASCII_INFO) {
624       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
625     }
626   }
627   PetscFunctionReturn(0);
628 }
629 
630 
631 /*MC
632   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
633   via the external package SuperLU_DIST.
634 
635   If SuperLU_DIST is installed (see the manual for
636   instructions on how to declare the existence of external packages),
637   a matrix type can be constructed which invokes SuperLU_DIST solvers.
638   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
639   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
640   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
641 
642   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
643   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
644   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
645   for communicators controlling multiple processes.  It is recommended that you call both of
646   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
647   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
648   without data copy; but this MUST be called AFTER the matrix values are set.
649 
650 
651 
652   Options Database Keys:
653 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
654 . -mat_superlu_dist_r <n> - number of rows in processor partition
655 . -mat_superlu_dist_c <n> - number of columns in processor partition
656 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
657 . -mat_superlu_dist_equil - equilibrate the matrix
658 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
659 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
660 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
661 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
662 . -mat_superlu_dist_iterrefine - use iterative refinement
663 - -mat_superlu_dist_statprint - print factorization information
664 
665    Level: beginner
666 
667 .seealso: PCLU
668 M*/
669 
670