xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision fe9eba3dd04352b54df67a907d37ac6fd6e9766e)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.2 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 #if defined(PETSC_USE_64BIT_INDICES)
14 /* ugly SuperLU_Dist variable telling it to use long long int */
15 #define _LONGINT
16 #endif
17 
18 EXTERN_C_BEGIN
19 #if defined(PETSC_USE_COMPLEX)
20 #include "superlu_zdefs.h"
21 #else
22 #include "superlu_ddefs.h"
23 #endif
24 EXTERN_C_END
25 
26 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
27 const char *SuperLU_MatInputModes[]    = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0};
28 
29 typedef struct {
30   int_t                   nprow,npcol,*row,*col;
31   gridinfo_t              grid;
32   superlu_options_t       options;
33   SuperMatrix             A_sup;
34   ScalePermstruct_t       ScalePermstruct;
35   LUstruct_t              LUstruct;
36   int                     StatPrint;
37   SuperLU_MatInputMode    MatInputMode;
38   SOLVEstruct_t           SOLVEstruct;
39   fact_t                  FactPattern;
40   MPI_Comm                comm_superlu;
41 #if defined(PETSC_USE_COMPLEX)
42   doublecomplex           *val;
43 #else
44   double                  *val;
45 #endif
46 
47   /* Flag to clean up (non-global) SuperLU objects during Destroy */
48   PetscTruth CleanUpSuperLU_Dist;
49 } Mat_SuperLU_DIST;
50 
51 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
52 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,Mat,const MatFactorInfo *);
53 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
54 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
55 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
56 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,Mat,IS,IS,const MatFactorInfo *);
57 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
61 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
62 {
63   PetscErrorCode   ierr;
64   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
65 
66   PetscFunctionBegin;
67   if (lu->CleanUpSuperLU_Dist) {
68     /* Deallocate SuperLU_DIST storage */
69     if (lu->MatInputMode == GLOBAL) {
70       Destroy_CompCol_Matrix_dist(&lu->A_sup);
71     } else {
72       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
73       if ( lu->options.SolveInitialized ) {
74 #if defined(PETSC_USE_COMPLEX)
75         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
76 #else
77         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
78 #endif
79       }
80     }
81     Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
82     ScalePermstructFree(&lu->ScalePermstruct);
83     LUstructFree(&lu->LUstruct);
84 
85     /* Release the SuperLU_DIST process grid. */
86     superlu_gridexit(&lu->grid);
87 
88     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
89   }
90 
91   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
92   PetscFunctionReturn(0);
93 }
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatSolve_SuperLU_DIST"
97 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
98 {
99   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
100   PetscErrorCode   ierr;
101   PetscMPIInt      size;
102   PetscInt         m=A->rmap->N, N=A->cmap->N;
103   SuperLUStat_t    stat;
104   double           berr[1];
105   PetscScalar      *bptr;
106   PetscInt         nrhs=1;
107   Vec              x_seq;
108   IS               iden;
109   VecScatter       scat;
110   int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
111 
112   PetscFunctionBegin;
113   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
114   if (size > 1) {
115     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
116       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
117       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
118       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
119       ierr = ISDestroy(iden);CHKERRQ(ierr);
120 
121       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
122       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
123       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
124     } else { /* distributed mat input */
125       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
126       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
127     }
128   } else { /* size == 1 */
129     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
130     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
131   }
132 
133   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
134 
135   PStatInit(&stat);        /* Initialize the statistics variables. */
136   if (lu->MatInputMode == GLOBAL) {
137 #if defined(PETSC_USE_COMPLEX)
138     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
139                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
140 #else
141     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
142                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
143 #endif
144   } else { /* distributed mat input */
145 #if defined(PETSC_USE_COMPLEX)
146     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid,
147 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
148     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
149 #else
150     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid,
151 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
152     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
153 #endif
154   }
155   if (lu->options.PrintStat) {
156      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
157   }
158   PStatFree(&stat);
159 
160   if (size > 1) {
161     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
162       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
163       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
164       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
165       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
166       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
167     } else {
168       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
169     }
170   } else {
171     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
172   }
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNCT__
177 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
178 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
179 {
180   Mat              *tseq,A_seq = PETSC_NULL;
181   Mat_SeqAIJ       *aa,*bb;
182   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
183   PetscErrorCode   ierr;
184   PetscInt         M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
185                    m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
186   int              sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
187   PetscMPIInt      size,rank;
188   SuperLUStat_t    stat;
189   double           *berr=0;
190   IS               isrow;
191   PetscLogDouble   time0,time,time_min,time_max;
192   Mat              F_diag=PETSC_NULL;
193 #if defined(PETSC_USE_COMPLEX)
194   doublecomplex    *av, *bv;
195 #else
196   double           *av, *bv;
197 #endif
198 
199   PetscFunctionBegin;
200   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
201   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
202 
203   if (lu->options.PrintStat) { /* collect time for mat conversion */
204     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
205     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
206   }
207 
208   if (lu->MatInputMode == GLOBAL) { /* global mat input */
209     if (size > 1) { /* convert mpi A to seq mat A */
210       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
211       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
212       ierr = ISDestroy(isrow);CHKERRQ(ierr);
213 
214       A_seq = *tseq;
215       ierr = PetscFree(tseq);CHKERRQ(ierr);
216       aa =  (Mat_SeqAIJ*)A_seq->data;
217     } else {
218       aa =  (Mat_SeqAIJ*)A->data;
219     }
220 
221     /* Convert Petsc NR matrix to SuperLU_DIST NC.
222        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
223     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
224       if (lu->FactPattern == SamePattern_SameRowPerm){
225         Destroy_CompCol_Matrix_dist(&lu->A_sup);
226         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
227         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
228       } else {
229         Destroy_CompCol_Matrix_dist(&lu->A_sup);
230         Destroy_LU(N, &lu->grid, &lu->LUstruct);
231         lu->options.Fact = SamePattern;
232       }
233     }
234 #if defined(PETSC_USE_COMPLEX)
235     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
236 #else
237     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
238 #endif
239 
240     /* Create compressed column matrix A_sup. */
241 #if defined(PETSC_USE_COMPLEX)
242     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
243 #else
244     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
245 #endif
246   } else { /* distributed mat input */
247     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
248     aa=(Mat_SeqAIJ*)(mat->A)->data;
249     bb=(Mat_SeqAIJ*)(mat->B)->data;
250     ai=aa->i; aj=aa->j;
251     bi=bb->i; bj=bb->j;
252 #if defined(PETSC_USE_COMPLEX)
253     av=(doublecomplex*)aa->a;
254     bv=(doublecomplex*)bb->a;
255 #else
256     av=aa->a;
257     bv=bb->a;
258 #endif
259     rstart = A->rmap->rstart;
260     nz     = aa->nz + bb->nz;
261     garray = mat->garray;
262 
263     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
264 #if defined(PETSC_USE_COMPLEX)
265       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
266 #else
267       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
268 #endif
269     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
270       if (lu->FactPattern == SamePattern_SameRowPerm){
271         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
272         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
273       } else {
274         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
275         lu->options.Fact = SamePattern;
276       }
277     }
278     nz = 0; irow = rstart;
279     for ( i=0; i<m; i++ ) {
280       lu->row[i] = nz;
281       countA = ai[i+1] - ai[i];
282       countB = bi[i+1] - bi[i];
283       ajj = aj + ai[i];  /* ptr to the beginning of this row */
284       bjj = bj + bi[i];
285 
286       /* B part, smaller col index */
287       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
288       jB = 0;
289       for (j=0; j<countB; j++){
290         jcol = garray[bjj[j]];
291         if (jcol > colA_start) {
292           jB = j;
293           break;
294         }
295         lu->col[nz] = jcol;
296         lu->val[nz++] = *bv++;
297         if (j==countB-1) jB = countB;
298       }
299 
300       /* A part */
301       for (j=0; j<countA; j++){
302         lu->col[nz] = rstart + ajj[j];
303         lu->val[nz++] = *av++;
304       }
305 
306       /* B part, larger col index */
307       for (j=jB; j<countB; j++){
308         lu->col[nz] = garray[bjj[j]];
309         lu->val[nz++] = *bv++;
310       }
311     }
312     lu->row[m] = nz;
313 #if defined(PETSC_USE_COMPLEX)
314     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
315 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
316 #else
317     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
318 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
319 #endif
320   }
321   if (lu->options.PrintStat) {
322     ierr = PetscGetTime(&time);CHKERRQ(ierr);
323     time0 = time - time0;
324   }
325 
326   /* Factor the matrix. */
327   PStatInit(&stat);   /* Initialize the statistics variables. */
328   CHKMEMQ;
329   if (lu->MatInputMode == GLOBAL) { /* global mat input */
330 #if defined(PETSC_USE_COMPLEX)
331     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
332                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
333 #else
334     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
335                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
336 #endif
337   } else { /* distributed mat input */
338 #if defined(PETSC_USE_COMPLEX)
339     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
340 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
341     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
342 #else
343     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
344 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
345     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
346 #endif
347   }
348 
349   if (lu->MatInputMode == GLOBAL && size > 1){
350     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
351   }
352 
353   if (lu->options.PrintStat) {
354     if (size > 1){
355       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
356       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
357       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
358       time = time/size; /* average time */
359       if (!rank) {
360         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
361       }
362     } else {
363       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
364     }
365     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
366   }
367   PStatFree(&stat);
368   if (size > 1){
369     F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
370     F_diag->assembled = PETSC_TRUE;
371   }
372   (F)->assembled    = PETSC_TRUE;
373   (F)->preallocated = PETSC_TRUE;
374   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
375   PetscFunctionReturn(0);
376 }
377 
378 /* Note the Petsc r and c permutations are ignored */
379 #undef __FUNCT__
380 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
381 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
382 {
383   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST*) (F)->spptr;
384   PetscInt          M=A->rmap->N,N=A->cmap->N;
385 
386   PetscFunctionBegin;
387   /* Initialize the SuperLU process grid. */
388   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
389 
390   /* Initialize ScalePermstruct and LUstruct. */
391   ScalePermstructInit(M, N, &lu->ScalePermstruct);
392   LUstructInit(M, N, &lu->LUstruct);
393   (F)->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
394   (F)->ops->solve            = MatSolve_SuperLU_DIST;
395   PetscFunctionReturn(0);
396 }
397 
398 #undef __FUNCT__
399 #define __FUNCT__ "MatGetFactor_aij_superlu_dist"
400 PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
401 {
402   Mat               B;
403   Mat_SuperLU_DIST  *lu;
404   PetscErrorCode    ierr;
405   PetscInt          M=A->rmap->N,N=A->cmap->N,indx;
406   PetscMPIInt       size;
407   superlu_options_t options;
408   PetscTruth        flg;
409   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
410   const char        *prtype[] = {"LargeDiag","NATURAL"};
411   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
412 
413   PetscFunctionBegin;
414   /* Create the factorization matrix */
415   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
416   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr);
417   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
418   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
419   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
420 
421   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
422   B->ops->view             = MatView_SuperLU_DIST;
423   B->factor                = MAT_FACTOR_LU;
424 
425   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
426   B->spptr = lu;
427 
428   /* Set the default input options:
429      options.Fact              = DOFACT;
430      options.Equil             = YES;
431      options.ParSymbFact       = NO;
432      options.ColPerm           = MMD_AT_PLUS_A;
433      options.RowPerm           = LargeDiag;
434      options.ReplaceTinyPivot  = YES;
435      options.IterRefine        = DOUBLE;
436      options.Trans             = NOTRANS;
437      options.SolveInitialized  = NO;
438      options.RefineInitialized = NO;
439      options.PrintStat         = YES;
440   */
441   set_default_options_dist(&options);
442 
443   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr);
444   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
445   /* Default num of process columns and rows */
446   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
447   if (!lu->npcol) lu->npcol = 1;
448   while (lu->npcol > 0) {
449     lu->nprow = PetscMPIIntCast(size/lu->npcol);
450     if (size == lu->nprow * lu->npcol) break;
451     lu->npcol --;
452   }
453 
454   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
455     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
456     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
457     if (size != lu->nprow * lu->npcol)
458       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
459 
460     lu->MatInputMode = DISTRIBUTED;
461     ierr = PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
462     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
463 
464     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
465     if (!flg) {
466       options.Equil = NO;
467     }
468 
469     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
470     if (flg) {
471       switch (indx) {
472       case 0:
473         options.RowPerm = LargeDiag;
474         break;
475       case 1:
476         options.RowPerm = NOROWPERM;
477         break;
478       }
479     }
480 
481     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr);
482     if (flg) {
483       switch (indx) {
484       case 0:
485         options.ColPerm = MMD_AT_PLUS_A;
486         break;
487       case 1:
488         options.ColPerm = NATURAL;
489         break;
490       case 2:
491         options.ColPerm = MMD_ATA;
492         break;
493       case 3:
494         options.ColPerm = PARMETIS;
495         break;
496       }
497     }
498 
499     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
500     if (!flg) {
501       options.ReplaceTinyPivot = NO;
502     }
503 
504     options.ParSymbFact = NO;
505     ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
506     if (flg){
507 #ifdef PETSC_HAVE_PARMETIS
508       options.ParSymbFact = YES;
509       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
510 #else
511       printf("parsymbfact needs PARMETIS");
512 #endif
513     }
514 
515     lu->FactPattern = SamePattern;
516     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
517     if (flg) {
518       switch (indx) {
519       case 0:
520         lu->FactPattern = SamePattern;
521         break;
522       case 1:
523         lu->FactPattern = SamePattern_SameRowPerm;
524         break;
525       }
526     }
527 
528     options.IterRefine = NOREFINE;
529     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
530     if (flg) {
531       options.IterRefine = DOUBLE;
532     }
533 
534     if (PetscLogPrintInfo) {
535       options.PrintStat = YES;
536     } else {
537       options.PrintStat = NO;
538     }
539     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
540                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
541   PetscOptionsEnd();
542 
543   lu->options             = options;
544   lu->options.Fact        = DOFACT;
545   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
546   *F = B;
547   PetscFunctionReturn(0);
548 }
549 
550 EXTERN_C_BEGIN
551 #undef __FUNCT__
552 #define __FUNCT__ "MatGetFactor_seqaij_superlu_dist"
553 PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
554 {
555   PetscErrorCode ierr;
556 
557   PetscFunctionBegin;
558   ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr);
559   PetscFunctionReturn(0);
560 }
561 EXTERN_C_END
562 
563 EXTERN_C_BEGIN
564 #undef __FUNCT__
565 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist"
566 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
567 {
568   PetscErrorCode ierr;
569 
570   PetscFunctionBegin;
571   ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 EXTERN_C_END
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
578 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
579 {
580   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
581   superlu_options_t options;
582   PetscErrorCode    ierr;
583 
584   PetscFunctionBegin;
585   /* check if matrix is superlu_dist type */
586   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
587 
588   options = lu->options;
589   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
590   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
591   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
592   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
593   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
594   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
595   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
596   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
597   if (options.ColPerm == NATURAL) {
598     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
599   } else if (options.ColPerm == MMD_AT_PLUS_A) {
600     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
601   } else if (options.ColPerm == MMD_ATA) {
602     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
603   } else if (options.ColPerm == PARMETIS) {
604     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");CHKERRQ(ierr);
605   } else {
606     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
607   }
608 
609   ierr = PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr);
610 
611   if (lu->FactPattern == SamePattern){
612     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
613   } else {
614     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 #undef __FUNCT__
620 #define __FUNCT__ "MatView_SuperLU_DIST"
621 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
622 {
623   PetscErrorCode    ierr;
624   PetscTruth        iascii;
625   PetscViewerFormat format;
626 
627   PetscFunctionBegin;
628   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
629   if (iascii) {
630     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
631     if (format == PETSC_VIEWER_ASCII_INFO) {
632       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
633     }
634   }
635   PetscFunctionReturn(0);
636 }
637 
638 
639 /*MC
640   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
641   via the external package SuperLU_DIST.
642 
643   If SuperLU_DIST is installed (see the manual for
644   instructions on how to declare the existence of external packages),
645   a matrix type can be constructed which invokes SuperLU_DIST solvers.
646   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
647   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
648   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
649 
650   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
651   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
652   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
653   for communicators controlling multiple processes.  It is recommended that you call both of
654   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
655   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
656   without data copy; but this MUST be called AFTER the matrix values are set.
657 
658 
659 
660   Options Database Keys:
661 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
662 . -mat_superlu_dist_r <n> - number of rows in processor partition
663 . -mat_superlu_dist_c <n> - number of columns in processor partition
664 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
665 . -mat_superlu_dist_equil - equilibrate the matrix
666 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
667 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
668 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
669 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
670 . -mat_superlu_dist_iterrefine - use iterative refinement
671 - -mat_superlu_dist_statprint - print factorization information
672 
673    Level: beginner
674 
675 .seealso: PCLU
676 M*/
677 
678