xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision fe28780beaa103e97151c0f4dcaa7b8cd2fce974)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.2 sparse solver
5 */
6 
7 #include "../src/mat/impls/aij/seq/aij.h"
8 #include "../src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 #if defined(PETSC_USE_64BIT_INDICES)
14 /* ugly SuperLU_Dist variable telling it to use long long int */
15 #define _LONGINT
16 #endif
17 
18 EXTERN_C_BEGIN
19 #if defined(PETSC_USE_COMPLEX)
20 #include "superlu_zdefs.h"
21 #else
22 #include "superlu_ddefs.h"
23 #endif
24 EXTERN_C_END
25 
26 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
27 const char *SuperLU_MatInputModes[]    = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0};
28 
29 typedef struct {
30   int_t                   nprow,npcol,*row,*col;
31   gridinfo_t              grid;
32   superlu_options_t       options;
33   SuperMatrix             A_sup;
34   ScalePermstruct_t       ScalePermstruct;
35   LUstruct_t              LUstruct;
36   int                     StatPrint;
37   SuperLU_MatInputMode    MatInputMode;
38   SOLVEstruct_t           SOLVEstruct;
39   fact_t                  FactPattern;
40   MPI_Comm                comm_superlu;
41 #if defined(PETSC_USE_COMPLEX)
42   doublecomplex           *val;
43 #else
44   double                  *val;
45 #endif
46 
47   /* Flag to clean up (non-global) SuperLU objects during Destroy */
48   PetscTruth CleanUpSuperLU_Dist;
49 } Mat_SuperLU_DIST;
50 
51 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
52 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,Mat,const MatFactorInfo *);
53 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
54 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
55 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
56 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,Mat,IS,IS,const MatFactorInfo *);
57 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
58 
59 #undef __FUNCT__
60 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
61 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
62 {
63   PetscErrorCode   ierr;
64   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
65 
66   PetscFunctionBegin;
67   if (lu->CleanUpSuperLU_Dist) {
68     /* Deallocate SuperLU_DIST storage */
69     if (lu->MatInputMode == GLOBAL) {
70       Destroy_CompCol_Matrix_dist(&lu->A_sup);
71     } else {
72       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
73       if ( lu->options.SolveInitialized ) {
74 #if defined(PETSC_USE_COMPLEX)
75         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
76 #else
77         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
78 #endif
79       }
80     }
81     Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
82     ScalePermstructFree(&lu->ScalePermstruct);
83     LUstructFree(&lu->LUstruct);
84 
85     /* Release the SuperLU_DIST process grid. */
86     superlu_gridexit(&lu->grid);
87 
88     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
89   }
90 
91   ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
92   PetscFunctionReturn(0);
93 }
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatSolve_SuperLU_DIST"
97 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
98 {
99   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
100   PetscErrorCode   ierr;
101   PetscMPIInt      size;
102   PetscInt         m=A->rmap->N, N=A->cmap->N;
103   SuperLUStat_t    stat;
104   double           berr[1];
105   PetscScalar      *bptr;
106   PetscInt         nrhs=1;
107   Vec              x_seq;
108   IS               iden;
109   VecScatter       scat;
110   int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
111 
112   PetscFunctionBegin;
113   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
114   if (size > 1) {
115     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
116       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
117       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
118       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
119       ierr = ISDestroy(iden);CHKERRQ(ierr);
120 
121       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
122       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
123       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
124     } else { /* distributed mat input */
125       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
126       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
127     }
128   } else { /* size == 1 */
129     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
130     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
131   }
132 
133   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
134 
135   PStatInit(&stat);        /* Initialize the statistics variables. */
136   if (lu->MatInputMode == GLOBAL) {
137 #if defined(PETSC_USE_COMPLEX)
138     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
139                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
140 #else
141     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
142                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
143 #endif
144   } else { /* distributed mat input */
145 #if defined(PETSC_USE_COMPLEX)
146     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap->N, nrhs, &lu->grid,
147 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
148     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
149 #else
150     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap->N, nrhs, &lu->grid,
151 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
152     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
153 #endif
154   }
155   if (lu->options.PrintStat) {
156      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
157   }
158   PStatFree(&stat);
159 
160   if (size > 1) {
161     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
162       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
163       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
164       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
165       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
166       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
167     } else {
168       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
169     }
170   } else {
171     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
172   }
173   PetscFunctionReturn(0);
174 }
175 
176 #undef __FUNCT__
177 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
178 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
179 {
180   Mat              *tseq,A_seq = PETSC_NULL;
181   Mat_SeqAIJ       *aa,*bb;
182   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
183   PetscErrorCode   ierr;
184   PetscInt         M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
185                    m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
186   int              sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
187   PetscMPIInt      size,rank;
188   SuperLUStat_t    stat;
189   double           *berr=0;
190   IS               isrow;
191   PetscLogDouble   time0,time,time_min,time_max;
192   Mat              F_diag=PETSC_NULL;
193 #if defined(PETSC_USE_COMPLEX)
194   doublecomplex    *av, *bv;
195 #else
196   double           *av, *bv;
197 #endif
198 
199   PetscFunctionBegin;
200   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
201   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
202 
203   if (lu->options.PrintStat) { /* collect time for mat conversion */
204     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
205     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
206   }
207 
208   if (lu->MatInputMode == GLOBAL) { /* global mat input */
209     if (size > 1) { /* convert mpi A to seq mat A */
210       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
211       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
212       ierr = ISDestroy(isrow);CHKERRQ(ierr);
213 
214       A_seq = *tseq;
215       ierr = PetscFree(tseq);CHKERRQ(ierr);
216       aa =  (Mat_SeqAIJ*)A_seq->data;
217     } else {
218       aa =  (Mat_SeqAIJ*)A->data;
219     }
220 
221     /* Convert Petsc NR matrix to SuperLU_DIST NC.
222        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
223     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
224       if (lu->FactPattern == SamePattern_SameRowPerm){
225         Destroy_CompCol_Matrix_dist(&lu->A_sup);
226         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
227         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
228       } else {
229         Destroy_CompCol_Matrix_dist(&lu->A_sup);
230         Destroy_LU(N, &lu->grid, &lu->LUstruct);
231         lu->options.Fact = SamePattern;
232       }
233     }
234 #if defined(PETSC_USE_COMPLEX)
235     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
236 #else
237     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
238 #endif
239 
240     /* Create compressed column matrix A_sup. */
241 #if defined(PETSC_USE_COMPLEX)
242     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
243 #else
244     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
245 #endif
246   } else { /* distributed mat input */
247     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
248     aa=(Mat_SeqAIJ*)(mat->A)->data;
249     bb=(Mat_SeqAIJ*)(mat->B)->data;
250     ai=aa->i; aj=aa->j;
251     bi=bb->i; bj=bb->j;
252 #if defined(PETSC_USE_COMPLEX)
253     av=(doublecomplex*)aa->a;
254     bv=(doublecomplex*)bb->a;
255 #else
256     av=aa->a;
257     bv=bb->a;
258 #endif
259     rstart = A->rmap->rstart;
260     nz     = aa->nz + bb->nz;
261     garray = mat->garray;
262 
263     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
264 #if defined(PETSC_USE_COMPLEX)
265       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
266 #else
267       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
268 #endif
269     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
270       if (lu->FactPattern == SamePattern_SameRowPerm){
271         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
272         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
273       } else {
274         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
275         lu->options.Fact = SamePattern;
276       }
277     }
278     nz = 0; irow = rstart;
279     for ( i=0; i<m; i++ ) {
280       lu->row[i] = nz;
281       countA = ai[i+1] - ai[i];
282       countB = bi[i+1] - bi[i];
283       ajj = aj + ai[i];  /* ptr to the beginning of this row */
284       bjj = bj + bi[i];
285 
286       /* B part, smaller col index */
287       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
288       jB = 0;
289       for (j=0; j<countB; j++){
290         jcol = garray[bjj[j]];
291         if (jcol > colA_start) {
292           jB = j;
293           break;
294         }
295         lu->col[nz] = jcol;
296         lu->val[nz++] = *bv++;
297         if (j==countB-1) jB = countB;
298       }
299 
300       /* A part */
301       for (j=0; j<countA; j++){
302         lu->col[nz] = rstart + ajj[j];
303         lu->val[nz++] = *av++;
304       }
305 
306       /* B part, larger col index */
307       for (j=jB; j<countB; j++){
308         lu->col[nz] = garray[bjj[j]];
309         lu->val[nz++] = *bv++;
310       }
311     }
312     lu->row[m] = nz;
313 #if defined(PETSC_USE_COMPLEX)
314     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
315 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
316 #else
317     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
318 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
319 #endif
320   }
321   if (lu->options.PrintStat) {
322     ierr = PetscGetTime(&time);CHKERRQ(ierr);
323     time0 = time - time0;
324   }
325 
326   /* Factor the matrix. */
327   PStatInit(&stat);   /* Initialize the statistics variables. */
328   CHKMEMQ;
329   if (lu->MatInputMode == GLOBAL) { /* global mat input */
330 #if defined(PETSC_USE_COMPLEX)
331     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
332                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
333 #else
334     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
335                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
336 #endif
337   } else { /* distributed mat input */
338 #if defined(PETSC_USE_COMPLEX)
339     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
340 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
341     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
342 #else
343     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
344 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
345     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
346 #endif
347   }
348 
349   if (lu->MatInputMode == GLOBAL && size > 1){
350     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
351   }
352 
353   if (lu->options.PrintStat) {
354     if (size > 1){
355       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
356       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
357       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
358       time = time/size; /* average time */
359       if (!rank) {
360         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
361       }
362     } else {
363       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
364     }
365     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
366   }
367   PStatFree(&stat);
368   if (size > 1){
369     F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
370     F_diag->assembled = PETSC_TRUE;
371   }
372   (F)->assembled    = PETSC_TRUE;
373   (F)->preallocated = PETSC_TRUE;
374   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
375   PetscFunctionReturn(0);
376 }
377 
378 /* Note the Petsc r and c permutations are ignored */
379 #undef __FUNCT__
380 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
381 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
382 {
383   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST*) (F)->spptr;
384   PetscInt          M=A->rmap->N,N=A->cmap->N;
385 
386   PetscFunctionBegin;
387   /* Initialize the SuperLU process grid. */
388   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
389 
390   /* Initialize ScalePermstruct and LUstruct. */
391   ScalePermstructInit(M, N, &lu->ScalePermstruct);
392   LUstructInit(M, N, &lu->LUstruct);
393   (F)->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
394   (F)->ops->solve            = MatSolve_SuperLU_DIST;
395   PetscFunctionReturn(0);
396 }
397 
398 EXTERN_C_BEGIN
399 #undef __FUNCT__
400 #define __FUNCT__ "MatFactorGetSolverPackage_aij_superlu_dist"
401 PetscErrorCode MatFactorGetSolverPackage_aij_superlu_dist(Mat A,const MatSolverPackage *type)
402 {
403   PetscFunctionBegin;
404   *type = MAT_SOLVER_SUPERLU_DIST;
405   PetscFunctionReturn(0);
406 }
407 EXTERN_C_END
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatGetFactor_aij_superlu_dist"
411 PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
412 {
413   Mat               B;
414   Mat_SuperLU_DIST  *lu;
415   PetscErrorCode    ierr;
416   PetscInt          M=A->rmap->N,N=A->cmap->N,indx;
417   PetscMPIInt       size;
418   superlu_options_t options;
419   PetscTruth        flg;
420   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
421   const char        *prtype[] = {"LargeDiag","NATURAL"};
422   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
423 
424   PetscFunctionBegin;
425   /* Create the factorization matrix */
426   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
427   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr);
428   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
429   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
430   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
431 
432   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
433   B->ops->view             = MatView_SuperLU_DIST;
434   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_aij_superlu_dist",MatFactorGetSolverPackage_aij_superlu_dist);CHKERRQ(ierr);
435   B->factor                = MAT_FACTOR_LU;
436 
437   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
438   B->spptr = lu;
439 
440   /* Set the default input options:
441      options.Fact              = DOFACT;
442      options.Equil             = YES;
443      options.ParSymbFact       = NO;
444      options.ColPerm           = MMD_AT_PLUS_A;
445      options.RowPerm           = LargeDiag;
446      options.ReplaceTinyPivot  = YES;
447      options.IterRefine        = DOUBLE;
448      options.Trans             = NOTRANS;
449      options.SolveInitialized  = NO;
450      options.RefineInitialized = NO;
451      options.PrintStat         = YES;
452   */
453   set_default_options_dist(&options);
454 
455   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr);
456   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
457   /* Default num of process columns and rows */
458   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
459   if (!lu->npcol) lu->npcol = 1;
460   while (lu->npcol > 0) {
461     lu->nprow = PetscMPIIntCast(size/lu->npcol);
462     if (size == lu->nprow * lu->npcol) break;
463     lu->npcol --;
464   }
465 
466   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
467     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
468     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
469     if (size != lu->nprow * lu->npcol)
470       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
471 
472     lu->MatInputMode = DISTRIBUTED;
473     ierr = PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
474     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
475 
476     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
477     if (!flg) {
478       options.Equil = NO;
479     }
480 
481     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
482     if (flg) {
483       switch (indx) {
484       case 0:
485         options.RowPerm = LargeDiag;
486         break;
487       case 1:
488         options.RowPerm = NOROWPERM;
489         break;
490       }
491     }
492 
493     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr);
494     if (flg) {
495       switch (indx) {
496       case 0:
497         options.ColPerm = MMD_AT_PLUS_A;
498         break;
499       case 1:
500         options.ColPerm = NATURAL;
501         break;
502       case 2:
503         options.ColPerm = MMD_ATA;
504         break;
505       case 3:
506         options.ColPerm = PARMETIS;
507         break;
508       }
509     }
510 
511     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
512     if (!flg) {
513       options.ReplaceTinyPivot = NO;
514     }
515 
516     options.ParSymbFact = NO;
517     ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
518     if (flg){
519 #ifdef PETSC_HAVE_PARMETIS
520       options.ParSymbFact = YES;
521       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
522 #else
523       printf("parsymbfact needs PARMETIS");
524 #endif
525     }
526 
527     lu->FactPattern = SamePattern;
528     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
529     if (flg) {
530       switch (indx) {
531       case 0:
532         lu->FactPattern = SamePattern;
533         break;
534       case 1:
535         lu->FactPattern = SamePattern_SameRowPerm;
536         break;
537       }
538     }
539 
540     options.IterRefine = NOREFINE;
541     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
542     if (flg) {
543       options.IterRefine = DOUBLE;
544     }
545 
546     if (PetscLogPrintInfo) {
547       options.PrintStat = YES;
548     } else {
549       options.PrintStat = NO;
550     }
551     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
552                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
553   PetscOptionsEnd();
554 
555   lu->options             = options;
556   lu->options.Fact        = DOFACT;
557   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
558   *F = B;
559   PetscFunctionReturn(0);
560 }
561 
562 EXTERN_C_BEGIN
563 #undef __FUNCT__
564 #define __FUNCT__ "MatGetFactor_seqaij_superlu_dist"
565 PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
566 {
567   PetscErrorCode ierr;
568 
569   PetscFunctionBegin;
570   ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 EXTERN_C_END
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist"
578 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
579 {
580   PetscErrorCode ierr;
581 
582   PetscFunctionBegin;
583   ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 EXTERN_C_END
587 
588 #undef __FUNCT__
589 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
590 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
591 {
592   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
593   superlu_options_t options;
594   PetscErrorCode    ierr;
595 
596   PetscFunctionBegin;
597   /* check if matrix is superlu_dist type */
598   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
599 
600   options = lu->options;
601   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
602   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
603   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
604   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
605   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
606   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
607   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
608   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
609   if (options.ColPerm == NATURAL) {
610     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
611   } else if (options.ColPerm == MMD_AT_PLUS_A) {
612     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
613   } else if (options.ColPerm == MMD_ATA) {
614     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
615   } else if (options.ColPerm == PARMETIS) {
616     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");CHKERRQ(ierr);
617   } else {
618     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
619   }
620 
621   ierr = PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr);
622 
623   if (lu->FactPattern == SamePattern){
624     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
625   } else {
626     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
627   }
628   PetscFunctionReturn(0);
629 }
630 
631 #undef __FUNCT__
632 #define __FUNCT__ "MatView_SuperLU_DIST"
633 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
634 {
635   PetscErrorCode    ierr;
636   PetscTruth        iascii;
637   PetscViewerFormat format;
638 
639   PetscFunctionBegin;
640   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
641   if (iascii) {
642     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
643     if (format == PETSC_VIEWER_ASCII_INFO) {
644       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
645     }
646   }
647   PetscFunctionReturn(0);
648 }
649 
650 
651 /*MC
652   MAT_SOLVER_SUPERLU_DIST - Parallel direct solver package for LU factorization
653 
654    Works with AIJ matrices
655 
656   Options Database Keys:
657 + -mat_superlu_dist_r <n> - number of rows in processor partition
658 . -mat_superlu_dist_c <n> - number of columns in processor partition
659 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
660 . -mat_superlu_dist_equil - equilibrate the matrix
661 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
662 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
663 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
664 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
665 . -mat_superlu_dist_iterrefine - use iterative refinement
666 - -mat_superlu_dist_statprint - print factorization information
667 
668    Level: beginner
669 
670 .seealso: PCLU
671 
672 .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage
673 
674 M*/
675 
676