xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision b271bb04eaec0d477b629c8be871cf42cb4980f5)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.2 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   fact_t                  FactPattern;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* Flag to clean up (non-global) SuperLU objects during Destroy */
42   PetscTruth CleanUpSuperLU_Dist;
43 } Mat_SuperLU_DIST;
44 
45 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
46 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,Mat,const MatFactorInfo *);
47 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
48 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
49 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
50 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,Mat,IS,IS,const MatFactorInfo *);
51 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
52 
53 #undef __FUNCT__
54 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
55 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
56 {
57   PetscErrorCode   ierr;
58   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
59 
60   PetscFunctionBegin;
61   if (lu->CleanUpSuperLU_Dist) {
62     /* Deallocate SuperLU_DIST storage */
63     if (lu->MatInputMode == GLOBAL) {
64       Destroy_CompCol_Matrix_dist(&lu->A_sup);
65     } else {
66       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
67       if ( lu->options.SolveInitialized ) {
68 #if defined(PETSC_USE_COMPLEX)
69         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
70 #else
71         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
72 #endif
73       }
74     }
75     Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
76     ScalePermstructFree(&lu->ScalePermstruct);
77     LUstructFree(&lu->LUstruct);
78 
79     /* Release the SuperLU_DIST process grid. */
80     superlu_gridexit(&lu->grid);
81 
82     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
83   }
84 
85   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
86   PetscFunctionReturn(0);
87 }
88 
89 #undef __FUNCT__
90 #define __FUNCT__ "MatSolve_SuperLU_DIST"
91 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
92 {
93   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
94   PetscErrorCode   ierr;
95   PetscMPIInt      size;
96   PetscInt         m=A->rmap->N, N=A->cmap->N;
97   SuperLUStat_t    stat;
98   double           berr[1];
99   PetscScalar      *bptr;
100   PetscInt         info, nrhs=1;
101   Vec              x_seq;
102   IS               iden;
103   VecScatter       scat;
104 
105   PetscFunctionBegin;
106   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
107   if (size > 1) {
108     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
109       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
110       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
111       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
112       ierr = ISDestroy(iden);CHKERRQ(ierr);
113 
114       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
115       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
116       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
117     } else { /* distributed mat input */
118       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
119       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
120     }
121   } else { /* size == 1 */
122     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
123     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
124   }
125 
126   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
127 
128   PStatInit(&stat);        /* Initialize the statistics variables. */
129   if (lu->MatInputMode == GLOBAL) {
130 #if defined(PETSC_USE_COMPLEX)
131     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
132                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
133 #else
134     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
135                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
136 #endif
137   } else { /* distributed mat input */
138 #if defined(PETSC_USE_COMPLEX)
139     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid,
140 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
141     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
142 #else
143     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid,
144 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
145     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
146 #endif
147   }
148   if (lu->options.PrintStat) {
149      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
150   }
151   PStatFree(&stat);
152 
153   if (size > 1) {
154     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
155       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
156       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
157       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
158       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
159       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
160     } else {
161       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
162     }
163   } else {
164     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
165   }
166   PetscFunctionReturn(0);
167 }
168 
169 #undef __FUNCT__
170 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
171 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
172 {
173   Mat              *tseq,A_seq = PETSC_NULL;
174   Mat_SeqAIJ       *aa,*bb;
175   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
176   PetscErrorCode   ierr;
177   PetscInt         M=A->rmap->N,N=A->cmap->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
178                    m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
179   PetscMPIInt      size,rank;
180   SuperLUStat_t    stat;
181   double           *berr=0;
182   IS               isrow;
183   PetscLogDouble   time0,time,time_min,time_max;
184   Mat              F_diag=PETSC_NULL;
185 #if defined(PETSC_USE_COMPLEX)
186   doublecomplex    *av, *bv;
187 #else
188   double           *av, *bv;
189 #endif
190 
191   PetscFunctionBegin;
192   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
193   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
194 
195   if (lu->options.PrintStat) { /* collect time for mat conversion */
196     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
197     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
198   }
199 
200   if (lu->MatInputMode == GLOBAL) { /* global mat input */
201     if (size > 1) { /* convert mpi A to seq mat A */
202       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
203       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
204       ierr = ISDestroy(isrow);CHKERRQ(ierr);
205 
206       A_seq = *tseq;
207       ierr = PetscFree(tseq);CHKERRQ(ierr);
208       aa =  (Mat_SeqAIJ*)A_seq->data;
209     } else {
210       aa =  (Mat_SeqAIJ*)A->data;
211     }
212 
213     /* Convert Petsc NR matrix to SuperLU_DIST NC.
214        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
215     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
216       if (lu->FactPattern == SamePattern_SameRowPerm){
217         Destroy_CompCol_Matrix_dist(&lu->A_sup);
218         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
219         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
220       } else {
221         Destroy_CompCol_Matrix_dist(&lu->A_sup);
222         Destroy_LU(N, &lu->grid, &lu->LUstruct);
223         lu->options.Fact = SamePattern;
224       }
225     }
226 #if defined(PETSC_USE_COMPLEX)
227     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
228 #else
229     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
230 #endif
231 
232     /* Create compressed column matrix A_sup. */
233 #if defined(PETSC_USE_COMPLEX)
234     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
235 #else
236     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
237 #endif
238   } else { /* distributed mat input */
239     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
240     aa=(Mat_SeqAIJ*)(mat->A)->data;
241     bb=(Mat_SeqAIJ*)(mat->B)->data;
242     ai=aa->i; aj=aa->j;
243     bi=bb->i; bj=bb->j;
244 #if defined(PETSC_USE_COMPLEX)
245     av=(doublecomplex*)aa->a;
246     bv=(doublecomplex*)bb->a;
247 #else
248     av=aa->a;
249     bv=bb->a;
250 #endif
251     rstart = A->rmap->rstart;
252     nz     = aa->nz + bb->nz;
253     garray = mat->garray;
254 
255     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
256 #if defined(PETSC_USE_COMPLEX)
257       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
258 #else
259       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
260 #endif
261     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
262       if (lu->FactPattern == SamePattern_SameRowPerm){
263         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
264         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
265       } else {
266         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
267         lu->options.Fact = SamePattern;
268       }
269     }
270     nz = 0; irow = rstart;
271     for ( i=0; i<m; i++ ) {
272       lu->row[i] = nz;
273       countA = ai[i+1] - ai[i];
274       countB = bi[i+1] - bi[i];
275       ajj = aj + ai[i];  /* ptr to the beginning of this row */
276       bjj = bj + bi[i];
277 
278       /* B part, smaller col index */
279       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
280       jB = 0;
281       for (j=0; j<countB; j++){
282         jcol = garray[bjj[j]];
283         if (jcol > colA_start) {
284           jB = j;
285           break;
286         }
287         lu->col[nz] = jcol;
288         lu->val[nz++] = *bv++;
289         if (j==countB-1) jB = countB;
290       }
291 
292       /* A part */
293       for (j=0; j<countA; j++){
294         lu->col[nz] = rstart + ajj[j];
295         lu->val[nz++] = *av++;
296       }
297 
298       /* B part, larger col index */
299       for (j=jB; j<countB; j++){
300         lu->col[nz] = garray[bjj[j]];
301         lu->val[nz++] = *bv++;
302       }
303     }
304     lu->row[m] = nz;
305 #if defined(PETSC_USE_COMPLEX)
306     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
307 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
308 #else
309     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
310 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
311 #endif
312   }
313   if (lu->options.PrintStat) {
314     ierr = PetscGetTime(&time);CHKERRQ(ierr);
315     time0 = time - time0;
316   }
317 
318   /* Factor the matrix. */
319   PStatInit(&stat);   /* Initialize the statistics variables. */
320 
321   if (lu->MatInputMode == GLOBAL) { /* global mat input */
322 #if defined(PETSC_USE_COMPLEX)
323     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
324                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
325 #else
326     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
327                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
328 #endif
329   } else { /* distributed mat input */
330 #if defined(PETSC_USE_COMPLEX)
331     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
332 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
333     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
334 #else
335     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
336 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
337     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
338 #endif
339   }
340 
341   if (lu->MatInputMode == GLOBAL && size > 1){
342     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
343   }
344 
345   if (lu->options.PrintStat) {
346     if (size > 1){
347       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
348       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
349       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
350       time = time/size; /* average time */
351       if (!rank) {
352         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
353       }
354     } else {
355       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
356     }
357     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
358   }
359   PStatFree(&stat);
360   if (size > 1){
361     F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
362     F_diag->assembled = PETSC_TRUE;
363   }
364   (F)->assembled    = PETSC_TRUE;
365   (F)->preallocated = PETSC_TRUE;
366   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
367   PetscFunctionReturn(0);
368 }
369 
370 /* Note the Petsc r and c permutations are ignored */
371 #undef __FUNCT__
372 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
373 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
374 {
375   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST*) (F)->spptr;
376   PetscInt          M=A->rmap->N,N=A->cmap->N;
377 
378   PetscFunctionBegin;
379   /* Initialize the SuperLU process grid. */
380   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
381 
382   /* Initialize ScalePermstruct and LUstruct. */
383   ScalePermstructInit(M, N, &lu->ScalePermstruct);
384   LUstructInit(M, N, &lu->LUstruct);
385   (F)->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
386   (F)->ops->solve            = MatSolve_SuperLU_DIST;
387   PetscFunctionReturn(0);
388 }
389 
390 EXTERN_C_BEGIN
391 #undef __FUNCT__
392 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist"
393 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
394 {
395   Mat               B;
396   Mat_SuperLU_DIST  *lu;
397   PetscErrorCode    ierr;
398   PetscInt          M=A->rmap->N,N=A->cmap->N,indx;
399   PetscMPIInt       size;
400   superlu_options_t options;
401   PetscTruth        flg;
402   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
403   const char        *prtype[] = {"LargeDiag","NATURAL"};
404   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
405 
406   PetscFunctionBegin;
407   /* Create the factorization matrix */
408   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
409   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr);
410   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
411   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
412   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
413 
414   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
415   B->ops->view             = MatView_SuperLU_DIST;
416   B->factor                = MAT_FACTOR_LU;
417 
418   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
419   B->spptr = lu;
420 
421   /* Set the default input options:
422      options.Fact              = DOFACT;
423      options.Equil             = YES;
424      options.ParSymbFact       = NO;
425      options.ColPerm           = MMD_AT_PLUS_A;
426      options.RowPerm           = LargeDiag;
427      options.ReplaceTinyPivot  = YES;
428      options.IterRefine        = DOUBLE;
429      options.Trans             = NOTRANS;
430      options.SolveInitialized  = NO;
431      options.RefineInitialized = NO;
432      options.PrintStat         = YES;
433   */
434   set_default_options_dist(&options);
435 
436   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr);
437   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
438   /* Default num of process columns and rows */
439   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
440   if (!lu->npcol) lu->npcol = 1;
441   while (lu->npcol > 0) {
442     lu->nprow = PetscMPIIntCast(size/lu->npcol);
443     if (size == lu->nprow * lu->npcol) break;
444     lu->npcol --;
445   }
446 
447   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
448     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
449     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
450     if (size != lu->nprow * lu->npcol)
451       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
452 
453     lu->MatInputMode = DISTRIBUTED;
454     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
455     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
456 
457     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
458     if (!flg) {
459       options.Equil = NO;
460     }
461 
462     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
463     if (flg) {
464       switch (indx) {
465       case 0:
466         options.RowPerm = LargeDiag;
467         break;
468       case 1:
469         options.RowPerm = NOROWPERM;
470         break;
471       }
472     }
473 
474     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr);
475     if (flg) {
476       switch (indx) {
477       case 0:
478         options.ColPerm = MMD_AT_PLUS_A;
479         break;
480       case 1:
481         options.ColPerm = NATURAL;
482         break;
483       case 2:
484         options.ColPerm = MMD_ATA;
485         break;
486       case 3:
487         options.ColPerm = PARMETIS;
488         break;
489       }
490     }
491 
492     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
493     if (!flg) {
494       options.ReplaceTinyPivot = NO;
495     }
496 
497     options.ParSymbFact = NO;
498     ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
499     if (flg){
500 #ifdef PETSC_HAVE_PARMETIS
501       options.ParSymbFact = YES;
502       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
503 #else
504       printf("parsymbfact needs PARMETIS");
505 #endif
506     }
507 
508     lu->FactPattern = SamePattern;
509     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
510     if (flg) {
511       switch (indx) {
512       case 0:
513         lu->FactPattern = SamePattern;
514         break;
515       case 1:
516         lu->FactPattern = SamePattern_SameRowPerm;
517         break;
518       }
519     }
520 
521     options.IterRefine = NOREFINE;
522     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
523     if (flg) {
524       options.IterRefine = DOUBLE;
525     }
526 
527     if (PetscLogPrintInfo) {
528       options.PrintStat = YES;
529     } else {
530       options.PrintStat = NO;
531     }
532     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
533                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
534   PetscOptionsEnd();
535 
536   lu->options             = options;
537   lu->options.Fact        = DOFACT;
538   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
539   *F = B;
540   PetscFunctionReturn(0);
541 }
542 EXTERN_C_END
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
546 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
547 {
548   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
549   superlu_options_t options;
550   PetscErrorCode    ierr;
551 
552   PetscFunctionBegin;
553   /* check if matrix is superlu_dist type */
554   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
555 
556   options = lu->options;
557   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
558   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
559   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
560   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
561   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
562   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
563   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
564   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
565   if (options.ColPerm == NATURAL) {
566     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
567   } else if (options.ColPerm == MMD_AT_PLUS_A) {
568     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
569   } else if (options.ColPerm == MMD_ATA) {
570     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
571   } else if (options.ColPerm == PARMETIS) {
572     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");CHKERRQ(ierr);
573   } else {
574     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
575   }
576 
577   ierr = PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr);
578 
579   if (lu->FactPattern == SamePattern){
580     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
581   } else {
582     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
583   }
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNCT__
588 #define __FUNCT__ "MatView_SuperLU_DIST"
589 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
590 {
591   PetscErrorCode    ierr;
592   PetscTruth        iascii;
593   PetscViewerFormat format;
594 
595   PetscFunctionBegin;
596   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
597   if (iascii) {
598     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
599     if (format == PETSC_VIEWER_ASCII_INFO) {
600       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
601     }
602   }
603   PetscFunctionReturn(0);
604 }
605 
606 
607 /*MC
608   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
609   via the external package SuperLU_DIST.
610 
611   If SuperLU_DIST is installed (see the manual for
612   instructions on how to declare the existence of external packages),
613   a matrix type can be constructed which invokes SuperLU_DIST solvers.
614   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
615   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
616   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
617 
618   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
619   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
620   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
621   for communicators controlling multiple processes.  It is recommended that you call both of
622   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
623   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
624   without data copy; but this MUST be called AFTER the matrix values are set.
625 
626 
627 
628   Options Database Keys:
629 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
630 . -mat_superlu_dist_r <n> - number of rows in processor partition
631 . -mat_superlu_dist_c <n> - number of columns in processor partition
632 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
633 . -mat_superlu_dist_equil - equilibrate the matrix
634 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
635 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
636 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
637 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
638 . -mat_superlu_dist_iterrefine - use iterative refinement
639 - -mat_superlu_dist_statprint - print factorization information
640 
641    Level: beginner
642 
643 .seealso: PCLU
644 M*/
645 
646