xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 5c9eb25f753630cfd361293e05e7358a00a954ac)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.2 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   fact_t                  FactPattern;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* Flag to clean up (non-global) SuperLU objects during Destroy */
42   PetscTruth CleanUpSuperLU_Dist;
43 } Mat_SuperLU_DIST;
44 
45 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
46 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *);
47 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
48 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
49 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
50 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *);
51 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
55 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
56 {
57   PetscErrorCode   ierr;
58   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
59 
60   PetscFunctionBegin;
61   if (lu->CleanUpSuperLU_Dist) {
62     /* Deallocate SuperLU_DIST storage */
63     if (lu->MatInputMode == GLOBAL) {
64       Destroy_CompCol_Matrix_dist(&lu->A_sup);
65     } else {
66       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
67       if ( lu->options.SolveInitialized ) {
68 #if defined(PETSC_USE_COMPLEX)
69         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
70 #else
71         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
72 #endif
73       }
74     }
75     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
76     ScalePermstructFree(&lu->ScalePermstruct);
77     LUstructFree(&lu->LUstruct);
78 
79     /* Release the SuperLU_DIST process grid. */
80     superlu_gridexit(&lu->grid);
81 
82     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
83   }
84 
85   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNCT__
90 #define __FUNCT__ "MatSolve_SuperLU_DIST"
91 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
92 {
93   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
94   PetscErrorCode   ierr;
95   PetscMPIInt      size;
96   PetscInt         m=A->rmap.N, N=A->cmap.N;
97   SuperLUStat_t    stat;
98   double           berr[1];
99   PetscScalar      *bptr;
100   PetscInt         info, nrhs=1;
101   Vec              x_seq;
102   IS               iden;
103   VecScatter       scat;
104 
105   PetscFunctionBegin;
106   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
107   if (size > 1) {
108     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
109       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
110       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
111       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
112       ierr = ISDestroy(iden);CHKERRQ(ierr);
113 
114       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
115       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
116       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
117     } else { /* distributed mat input */
118       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
119       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
120     }
121   } else { /* size == 1 */
122     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
123     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
124   }
125 
126   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
127 
128   PStatInit(&stat);        /* Initialize the statistics variables. */
129   if (lu->MatInputMode == GLOBAL) {
130 #if defined(PETSC_USE_COMPLEX)
131     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
132                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
133 #else
134     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
135                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
136 #endif
137   } else { /* distributed mat input */
138 #if defined(PETSC_USE_COMPLEX)
139     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
140 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
141     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
142 #else
143     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
144 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
145     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
146 #endif
147   }
148   if (lu->options.PrintStat) {
149      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
150   }
151   PStatFree(&stat);
152 
153   if (size > 1) {
154     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
155       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
156       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
157       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
158       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
159       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
160     } else {
161       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
162     }
163   } else {
164     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
165   }
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
171 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
172 {
173   Mat              *tseq,A_seq = PETSC_NULL;
174   Mat_SeqAIJ       *aa,*bb;
175   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
176   PetscErrorCode   ierr;
177   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
178                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
179   PetscMPIInt      size,rank;
180   SuperLUStat_t    stat;
181   double           *berr=0;
182   IS               isrow;
183   PetscLogDouble   time0,time,time_min,time_max;
184   Mat              F_diag=PETSC_NULL;
185 #if defined(PETSC_USE_COMPLEX)
186   doublecomplex    *av, *bv;
187 #else
188   double           *av, *bv;
189 #endif
190 
191   PetscFunctionBegin;
192   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
193   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
194 
195   if (lu->options.PrintStat) { /* collect time for mat conversion */
196     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
197     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
198   }
199 
200   if (lu->MatInputMode == GLOBAL) { /* global mat input */
201     if (size > 1) { /* convert mpi A to seq mat A */
202       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
203       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
204       ierr = ISDestroy(isrow);CHKERRQ(ierr);
205 
206       A_seq = *tseq;
207       ierr = PetscFree(tseq);CHKERRQ(ierr);
208       aa =  (Mat_SeqAIJ*)A_seq->data;
209     } else {
210       aa =  (Mat_SeqAIJ*)A->data;
211     }
212 
213     /* Convert Petsc NR matrix to SuperLU_DIST NC.
214        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
215     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
216       if (lu->FactPattern == SamePattern_SameRowPerm){
217         Destroy_CompCol_Matrix_dist(&lu->A_sup);
218         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
219         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
220       } else {
221         Destroy_CompCol_Matrix_dist(&lu->A_sup);
222         Destroy_LU(N, &lu->grid, &lu->LUstruct);
223         lu->options.Fact = SamePattern;
224       }
225     }
226 #if defined(PETSC_USE_COMPLEX)
227     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
228 #else
229     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
230 #endif
231 
232     /* Create compressed column matrix A_sup. */
233 #if defined(PETSC_USE_COMPLEX)
234     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
235 #else
236     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
237 #endif
238   } else { /* distributed mat input */
239     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
240     aa=(Mat_SeqAIJ*)(mat->A)->data;
241     bb=(Mat_SeqAIJ*)(mat->B)->data;
242     ai=aa->i; aj=aa->j;
243     bi=bb->i; bj=bb->j;
244 #if defined(PETSC_USE_COMPLEX)
245     av=(doublecomplex*)aa->a;
246     bv=(doublecomplex*)bb->a;
247 #else
248     av=aa->a;
249     bv=bb->a;
250 #endif
251     rstart = A->rmap.rstart;
252     nz     = aa->nz + bb->nz;
253     garray = mat->garray;
254 
255     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
256 #if defined(PETSC_USE_COMPLEX)
257       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
258 #else
259       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
260 #endif
261     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
262       if (lu->FactPattern == SamePattern_SameRowPerm){
263         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
264         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
265       } else {
266         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
267         lu->options.Fact = SamePattern;
268       }
269     }
270     nz = 0; irow = rstart;
271     for ( i=0; i<m; i++ ) {
272       lu->row[i] = nz;
273       countA = ai[i+1] - ai[i];
274       countB = bi[i+1] - bi[i];
275       ajj = aj + ai[i];  /* ptr to the beginning of this row */
276       bjj = bj + bi[i];
277 
278       /* B part, smaller col index */
279       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
280       jB = 0;
281       for (j=0; j<countB; j++){
282         jcol = garray[bjj[j]];
283         if (jcol > colA_start) {
284           jB = j;
285           break;
286         }
287         lu->col[nz] = jcol;
288         lu->val[nz++] = *bv++;
289         if (j==countB-1) jB = countB;
290       }
291 
292       /* A part */
293       for (j=0; j<countA; j++){
294         lu->col[nz] = rstart + ajj[j];
295         lu->val[nz++] = *av++;
296       }
297 
298       /* B part, larger col index */
299       for (j=jB; j<countB; j++){
300         lu->col[nz] = garray[bjj[j]];
301         lu->val[nz++] = *bv++;
302       }
303     }
304     lu->row[m] = nz;
305 #if defined(PETSC_USE_COMPLEX)
306     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
307 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
308 #else
309     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
310 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
311 #endif
312   }
313   if (lu->options.PrintStat) {
314     ierr = PetscGetTime(&time);CHKERRQ(ierr);
315     time0 = time - time0;
316   }
317 
318   /* Factor the matrix. */
319   PStatInit(&stat);   /* Initialize the statistics variables. */
320 
321   if (lu->MatInputMode == GLOBAL) { /* global mat input */
322 #if defined(PETSC_USE_COMPLEX)
323     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
324                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
325 #else
326     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
327                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
328 #endif
329   } else { /* distributed mat input */
330 #if defined(PETSC_USE_COMPLEX)
331     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
332 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
333     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
334 #else
335     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
336 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
337     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
338 #endif
339   }
340 
341   if (lu->MatInputMode == GLOBAL && size > 1){
342     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
343   }
344 
345   if (lu->options.PrintStat) {
346     if (size > 1){
347       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
348       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
349       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
350       time = time/size; /* average time */
351       if (!rank) {
352         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
353       }
354     } else {
355       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
356     }
357     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
358   }
359   PStatFree(&stat);
360   if (size > 1){
361     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
362     F_diag->assembled = PETSC_TRUE;
363   }
364   (*F)->assembled    = PETSC_TRUE;
365   (*F)->preallocated = PETSC_TRUE;
366   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
367   PetscFunctionReturn(0);
368 }
369 
370 /* Note the Petsc r and c permutations are ignored */
371 #undef __FUNCT__
372 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
373 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
374 {
375   Mat               B;
376   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST*) (*F)->spptr;
377   PetscErrorCode    ierr;
378   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
379 
380   PetscFunctionBegin;
381   /* Initialize the SuperLU process grid. */
382   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
383 
384   /* Initialize ScalePermstruct and LUstruct. */
385   ScalePermstructInit(M, N, &lu->ScalePermstruct);
386   LUstructInit(M, N, &lu->LUstruct);
387   PetscFunctionReturn(0);
388 }
389 
390 EXTERN_C_BEGIN
391 #undef __FUNCT__
392 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist"
393 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
394 {
395   Mat               B;
396   Mat_SuperLU_DIST  *lu;
397   PetscErrorCode    ierr;
398   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
399   PetscMPIInt       size;
400   superlu_options_t options;
401   PetscTruth        flg;
402   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
403   const char        *prtype[] = {"LargeDiag","NATURAL"};
404   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
405 
406   PetscFunctionBegin;
407   /* Create the factorization matrix */
408   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
409   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
410   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
411   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
412   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
413 
414   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
415   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
416   B->ops->solve            = MatSolve_SuperLU_DIST;
417   B->factor                = MAT_FACTOR_LU;
418 
419   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
420   B->spptr = lu;
421 
422   /* Set the default input options:
423      options.Fact              = DOFACT;
424      options.Equil             = YES;
425      options.ParSymbFact       = NO;
426      options.ColPerm           = MMD_AT_PLUS_A;
427      options.RowPerm           = LargeDiag;
428      options.ReplaceTinyPivot  = YES;
429      options.IterRefine        = DOUBLE;
430      options.Trans             = NOTRANS;
431      options.SolveInitialized  = NO;
432      options.RefineInitialized = NO;
433      options.PrintStat         = YES;
434   */
435   set_default_options_dist(&options);
436 
437   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr);
438   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
439   /* Default num of process columns and rows */
440   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
441   if (!lu->npcol) lu->npcol = 1;
442   while (lu->npcol > 0) {
443     lu->nprow = PetscMPIIntCast(size/lu->npcol);
444     if (size == lu->nprow * lu->npcol) break;
445     lu->npcol --;
446   }
447 
448   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
449     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
450     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
451     if (size != lu->nprow * lu->npcol)
452       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
453 
454     lu->MatInputMode = DISTRIBUTED;
455     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
456     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
457 
458     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
459     if (!flg) {
460       options.Equil = NO;
461     }
462 
463     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
464     if (flg) {
465       switch (indx) {
466       case 0:
467         options.RowPerm = LargeDiag;
468         break;
469       case 1:
470         options.RowPerm = NOROWPERM;
471         break;
472       }
473     }
474 
475     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr);
476     if (flg) {
477       switch (indx) {
478       case 0:
479         options.ColPerm = MMD_AT_PLUS_A;
480         break;
481       case 1:
482         options.ColPerm = NATURAL;
483         break;
484       case 2:
485         options.ColPerm = MMD_ATA;
486         break;
487       case 3:
488         options.ColPerm = PARMETIS;
489         break;
490       }
491     }
492 
493     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
494     if (!flg) {
495       options.ReplaceTinyPivot = NO;
496     }
497 
498     options.ParSymbFact = NO;
499     ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
500     if (flg){
501 #ifdef PETSC_HAVE_PARMETIS
502       options.ParSymbFact = YES;
503       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
504 #else
505       printf("parsymbfact needs PARMETIS");
506 #endif
507     }
508 
509     lu->FactPattern = SamePattern;
510     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
511     if (flg) {
512       switch (indx) {
513       case 0:
514         lu->FactPattern = SamePattern;
515         break;
516       case 1:
517         lu->FactPattern = SamePattern_SameRowPerm;
518         break;
519       }
520     }
521 
522     options.IterRefine = NOREFINE;
523     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
524     if (flg) {
525       options.IterRefine = DOUBLE;
526     }
527 
528     if (PetscLogPrintInfo) {
529       options.PrintStat = YES;
530     } else {
531       options.PrintStat = NO;
532     }
533     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
534                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
535   PetscOptionsEnd();
536 
537   lu->options             = options;
538   lu->options.Fact        = DOFACT;
539   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
540   *F = B;
541   PetscFunctionReturn(0);
542 }
543 EXTERN_C_END
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
547 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
548 {
549   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
550   superlu_options_t options;
551   PetscErrorCode    ierr;
552 
553   PetscFunctionBegin;
554   /* check if matrix is superlu_dist type */
555   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
556 
557   options = lu->options;
558   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
559   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
560   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
561   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
562   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
563   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
564   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
565   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
566   if (options.ColPerm == NATURAL) {
567     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
568   } else if (options.ColPerm == MMD_AT_PLUS_A) {
569     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
570   } else if (options.ColPerm == MMD_ATA) {
571     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
572   } else if (options.ColPerm == PARMETIS) {
573     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");CHKERRQ(ierr);
574   } else {
575     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
576   }
577 
578   ierr = PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr);
579 
580   if (lu->FactPattern == SamePattern){
581     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
582   } else {
583     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatView_SuperLU_DIST"
590 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
591 {
592   PetscErrorCode    ierr;
593   PetscTruth        iascii;
594   PetscViewerFormat format;
595   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
596 
597   PetscFunctionBegin;
598 
599   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
600   if (iascii) {
601     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
602     if (format == PETSC_VIEWER_ASCII_INFO) {
603       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
604     }
605   }
606   PetscFunctionReturn(0);
607 }
608 
609 
610 /*MC
611   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
612   via the external package SuperLU_DIST.
613 
614   If SuperLU_DIST is installed (see the manual for
615   instructions on how to declare the existence of external packages),
616   a matrix type can be constructed which invokes SuperLU_DIST solvers.
617   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
618   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
619   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
620 
621   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
622   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
623   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
624   for communicators controlling multiple processes.  It is recommended that you call both of
625   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
626   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
627   without data copy; but this MUST be called AFTER the matrix values are set.
628 
629 
630 
631   Options Database Keys:
632 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
633 . -mat_superlu_dist_r <n> - number of rows in processor partition
634 . -mat_superlu_dist_c <n> - number of columns in processor partition
635 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
636 . -mat_superlu_dist_equil - equilibrate the matrix
637 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
638 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
639 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
640 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
641 . -mat_superlu_dist_iterrefine - use iterative refinement
642 - -mat_superlu_dist_statprint - print factorization information
643 
644    Level: beginner
645 
646 .seealso: PCLU
647 M*/
648 
649