xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision dfbe8321245cdf9338262824a41580dace4e4143)
1 /*
2         Provides an interface to the SuperLU_DIST_2.0 sparse solver
3 */
4 
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
8 #include "stdlib.h"
9 #endif
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13 #include "superlu_zdefs.h"
14 #else
15 #include "superlu_ddefs.h"
16 #endif
17 EXTERN_C_END
18 
19 typedef enum { GLOBAL,DISTRIBUTED
20 } SuperLU_MatInputMode;
21 
22 typedef struct {
23   int_t                   nprow,npcol,*row,*col;
24   gridinfo_t              grid;
25   superlu_options_t       options;
26   SuperMatrix             A_sup;
27   ScalePermstruct_t       ScalePermstruct;
28   LUstruct_t              LUstruct;
29   int                     StatPrint;
30   int                     MatInputMode;
31   SOLVEstruct_t           SOLVEstruct;
32   MatStructure            flg;
33   MPI_Comm                comm_superlu;
34 #if defined(PETSC_USE_COMPLEX)
35   doublecomplex           *val;
36 #else
37   double                  *val;
38 #endif
39 
40   /* A few function pointers for inheritance */
41   int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
42   int (*MatView)(Mat,PetscViewer);
43   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
44   int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
45   int (*MatDestroy)(Mat);
46 
47   /* Flag to clean up (non-global) SuperLU objects during Destroy */
48   PetscTruth CleanUpSuperLU_Dist;
49 } Mat_SuperLU_DIST;
50 
51 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
52 
53 EXTERN_C_BEGIN
54 #undef __FUNCT__
55 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
56 PetscErrorCode MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,Mat *newmat) {
57   PetscErrorCode ierr;
58   Mat              B=*newmat;
59   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
60 
61   PetscFunctionBegin;
62   if (B != A) {
63     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
64   }
65   /* Reset the original function pointers */
66   B->ops->duplicate        = lu->MatDuplicate;
67   B->ops->view             = lu->MatView;
68   B->ops->assemblyend      = lu->MatAssemblyEnd;
69   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
70   B->ops->destroy          = lu->MatDestroy;
71 
72   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
73   ierr = PetscFree(lu);CHKERRQ(ierr);
74 
75   *newmat = B;
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
82 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
83 {
84   PetscErrorCode ierr;
85   int              size;
86   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
87 
88   PetscFunctionBegin;
89   if (lu->CleanUpSuperLU_Dist) {
90     /* Deallocate SuperLU_DIST storage */
91     if (lu->MatInputMode == GLOBAL) {
92       Destroy_CompCol_Matrix_dist(&lu->A_sup);
93     } else {
94       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
95       if ( lu->options.SolveInitialized ) {
96 #if defined(PETSC_USE_COMPLEX)
97         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
98 #else
99         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
100 #endif
101       }
102     }
103     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
104     ScalePermstructFree(&lu->ScalePermstruct);
105     LUstructFree(&lu->LUstruct);
106 
107     /* Release the SuperLU_DIST process grid. */
108     superlu_gridexit(&lu->grid);
109 
110     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
111   }
112 
113   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
114   if (size == 1) {
115     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
116   } else {
117     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
118   }
119   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
120 
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatSolve_SuperLU_DIST"
126 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
127 {
128   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
129   PetscErrorCode ierr;
130   int              size;
131   int              m=A->M, N=A->N;
132   SuperLUStat_t    stat;
133   double           berr[1];
134   PetscScalar      *bptr;
135   int              info, nrhs=1;
136   Vec              x_seq;
137   IS               iden;
138   VecScatter       scat;
139 
140   PetscFunctionBegin;
141   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
142   if (size > 1) {
143     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
144       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
145       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
146       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
147       ierr = ISDestroy(iden);CHKERRQ(ierr);
148 
149       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
150       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
151       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
152     } else { /* distributed mat input */
153       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
154       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
155     }
156   } else { /* size == 1 */
157     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
158     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
159   }
160 
161   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
162 
163   PStatInit(&stat);        /* Initialize the statistics variables. */
164 
165   if (lu->MatInputMode == GLOBAL) {
166 #if defined(PETSC_USE_COMPLEX)
167     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
168                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
169 #else
170     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
171                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
172 #endif
173   } else { /* distributed mat input */
174 #if defined(PETSC_USE_COMPLEX)
175     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
176 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
177     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
178 #else
179     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
180 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
181     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
182 #endif
183   }
184   if (lu->StatPrint) {
185      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
186   }
187   PStatFree(&stat);
188 
189   if (size > 1) {
190     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
191       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
192       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
193       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
194       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
195       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
196     } else {
197       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
198     }
199   } else {
200     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
201   }
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
207 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F)
208 {
209   Mat              *tseq,A_seq = PETSC_NULL;
210   Mat_SeqAIJ       *aa,*bb;
211   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
212   int              M=A->M,N=A->N,info,ierr,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
213                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
214   SuperLUStat_t    stat;
215   double           *berr=0;
216   IS               isrow;
217   PetscLogDouble   time0,time,time_min,time_max;
218 #if defined(PETSC_USE_COMPLEX)
219   doublecomplex    *av, *bv;
220 #else
221   double           *av, *bv;
222 #endif
223 
224   PetscFunctionBegin;
225   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
226   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
227 
228   if (lu->StatPrint) { /* collect time for mat conversion */
229     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
230     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
231   }
232 
233   if (lu->MatInputMode == GLOBAL) { /* global mat input */
234     if (size > 1) { /* convert mpi A to seq mat A */
235       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
236       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
237       ierr = ISDestroy(isrow);CHKERRQ(ierr);
238 
239       A_seq = *tseq;
240       ierr = PetscFree(tseq);CHKERRQ(ierr);
241       aa =  (Mat_SeqAIJ*)A_seq->data;
242     } else {
243       aa =  (Mat_SeqAIJ*)A->data;
244     }
245 
246     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
247     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
248 #if defined(PETSC_USE_COMPLEX)
249       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
250 #else
251       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
252 #endif
253     } else { /* successive numeric factorization, sparsity pattern is reused. */
254       Destroy_CompCol_Matrix_dist(&lu->A_sup);
255       Destroy_LU(N, &lu->grid, &lu->LUstruct);
256       lu->options.Fact = SamePattern;
257     }
258 #if defined(PETSC_USE_COMPLEX)
259     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
260 #else
261     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
262 #endif
263 
264     /* Create compressed column matrix A_sup. */
265 #if defined(PETSC_USE_COMPLEX)
266     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
267 #else
268     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
269 #endif
270   } else { /* distributed mat input */
271     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
272     aa=(Mat_SeqAIJ*)(mat->A)->data;
273     bb=(Mat_SeqAIJ*)(mat->B)->data;
274     ai=aa->i; aj=aa->j;
275     bi=bb->i; bj=bb->j;
276 #if defined(PETSC_USE_COMPLEX)
277     av=(doublecomplex*)aa->a;
278     bv=(doublecomplex*)bb->a;
279 #else
280     av=aa->a;
281     bv=bb->a;
282 #endif
283     rstart = mat->rstart;
284     nz     = aa->nz + bb->nz;
285     garray = mat->garray;
286     rstart = mat->rstart;
287 
288     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
289 #if defined(PETSC_USE_COMPLEX)
290       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
291 #else
292       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
293 #endif
294     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
295       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
296       Destroy_LU(N, &lu->grid, &lu->LUstruct);
297       lu->options.Fact = SamePattern;
298     }
299     nz = 0; irow = mat->rstart;
300     for ( i=0; i<m; i++ ) {
301       lu->row[i] = nz;
302       countA = ai[i+1] - ai[i];
303       countB = bi[i+1] - bi[i];
304       ajj = aj + ai[i];  /* ptr to the beginning of this row */
305       bjj = bj + bi[i];
306 
307       /* B part, smaller col index */
308       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
309       jB = 0;
310       for (j=0; j<countB; j++){
311         jcol = garray[bjj[j]];
312         if (jcol > colA_start) {
313           jB = j;
314           break;
315         }
316         lu->col[nz] = jcol;
317         lu->val[nz++] = *bv++;
318         if (j==countB-1) jB = countB;
319       }
320 
321       /* A part */
322       for (j=0; j<countA; j++){
323         lu->col[nz] = mat->rstart + ajj[j];
324         lu->val[nz++] = *av++;
325       }
326 
327       /* B part, larger col index */
328       for (j=jB; j<countB; j++){
329         lu->col[nz] = garray[bjj[j]];
330         lu->val[nz++] = *bv++;
331       }
332     }
333     lu->row[m] = nz;
334 #if defined(PETSC_USE_COMPLEX)
335     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
336 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
337 #else
338     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
339 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
340 #endif
341   }
342   if (lu->StatPrint) {
343     ierr = PetscGetTime(&time);CHKERRQ(ierr);
344     time0 = time - time0;
345   }
346 
347   /* Factor the matrix. */
348   PStatInit(&stat);   /* Initialize the statistics variables. */
349 
350   if (lu->MatInputMode == GLOBAL) { /* global mat input */
351 #if defined(PETSC_USE_COMPLEX)
352     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
353                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
354 #else
355     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
356                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
357 #endif
358   } else { /* distributed mat input */
359 #if defined(PETSC_USE_COMPLEX)
360     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
361 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
362     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
363 #else
364     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
365 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
366     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
367 #endif
368   }
369 
370   if (lu->MatInputMode == GLOBAL && size > 1){
371     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
372   }
373 
374   if (lu->StatPrint) {
375     if (size > 1){
376       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
377       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
378       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
379       time = time/size; /* average time */
380       if (!rank)
381         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
382                               %g / %g / %g\n",time_max,time_min,time);
383     } else {
384       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
385                               %g\n",time0);
386     }
387 
388     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
389   }
390   PStatFree(&stat);
391   (*F)->assembled = PETSC_TRUE;
392   lu->flg         = SAME_NONZERO_PATTERN;
393   PetscFunctionReturn(0);
394 }
395 
396 /* Note the Petsc r and c permutations are ignored */
397 #undef __FUNCT__
398 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
399 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
400 {
401   Mat               B;
402   Mat_SuperLU_DIST  *lu;
403   PetscErrorCode ierr;
404   int               M=A->M,N=A->N,size,indx;
405   superlu_options_t options;
406   PetscTruth        flg;
407   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
408   const char        *prtype[] = {"LargeDiag","NATURAL"};
409 
410   PetscFunctionBegin;
411   /* Create the factorization matrix */
412   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
413   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
414   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
415   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
416 
417   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
418   B->ops->solve            = MatSolve_SuperLU_DIST;
419   B->factor                = FACTOR_LU;
420 
421   lu = (Mat_SuperLU_DIST*)(B->spptr);
422 
423   /* Set the input options */
424   set_default_options_dist(&options);
425   lu->MatInputMode = GLOBAL;
426   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
427 
428   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
429   lu->nprow = size/2;               /* Default process rows.      */
430   if (!lu->nprow) lu->nprow = 1;
431   lu->npcol = size/lu->nprow;           /* Default process columns.   */
432 
433   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
434 
435     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
436     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
437     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
438 
439     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
440     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
441 
442     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
443     if (!flg) {
444       options.Equil = NO;
445     }
446 
447     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
448     if (flg) {
449       switch (indx) {
450       case 0:
451         options.RowPerm = LargeDiag;
452         break;
453       case 1:
454         options.RowPerm = NOROWPERM;
455         break;
456       }
457     }
458 
459     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
460     if (flg) {
461       switch (indx) {
462       case 0:
463         options.ColPerm = MMD_AT_PLUS_A;
464         break;
465       case 1:
466         options.ColPerm = NATURAL;
467         break;
468       case 2:
469         options.ColPerm = MMD_ATA;
470         break;
471       case 3:
472         options.ColPerm = COLAMD;
473         break;
474       }
475     }
476 
477     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
478     if (!flg) {
479       options.ReplaceTinyPivot = NO;
480     }
481 
482     options.IterRefine = NOREFINE;
483     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
484     if (flg) {
485       options.IterRefine = DOUBLE;
486     }
487 
488     if (PetscLogPrintInfo) {
489       lu->StatPrint = (int)PETSC_TRUE;
490     } else {
491       lu->StatPrint = (int)PETSC_FALSE;
492     }
493     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
494                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
495   PetscOptionsEnd();
496 
497   /* Initialize the SuperLU process grid. */
498   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
499 
500   /* Initialize ScalePermstruct and LUstruct. */
501   ScalePermstructInit(M, N, &lu->ScalePermstruct);
502   LUstructInit(M, N, &lu->LUstruct);
503 
504   lu->options            = options;
505   lu->flg                = DIFFERENT_NONZERO_PATTERN;
506   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
507   *F = B;
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNCT__
512 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
513 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
514   PetscErrorCode ierr;
515   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
516 
517   PetscFunctionBegin;
518   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
519   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
520   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
526 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
527 {
528   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
529   superlu_options_t options;
530   PetscErrorCode ierr;
531   char              *colperm;
532 
533   PetscFunctionBegin;
534   /* check if matrix is superlu_dist type */
535   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
536 
537   options = lu->options;
538   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
539   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
540   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
541   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
542   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
543   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
544   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
545   if (options.ColPerm == NATURAL) {
546     colperm = "NATURAL";
547   } else if (options.ColPerm == MMD_AT_PLUS_A) {
548     colperm = "MMD_AT_PLUS_A";
549   } else if (options.ColPerm == MMD_ATA) {
550     colperm = "MMD_ATA";
551   } else if (options.ColPerm == COLAMD) {
552     colperm = "COLAMD";
553   } else {
554     SETERRQ(1,"Unknown column permutation");
555   }
556   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatView_SuperLU_DIST"
562 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
563 {
564   PetscErrorCode ierr;
565   PetscTruth        iascii;
566   PetscViewerFormat format;
567   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
568 
569   PetscFunctionBegin;
570   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
571 
572   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
573   if (iascii) {
574     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
575     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
576       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 
583 EXTERN_C_BEGIN
584 #undef __FUNCT__
585 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
586 PetscErrorCode MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,Mat *newmat) {
587   /* This routine is only called to convert to MATSUPERLU_DIST */
588   /* from MATSEQAIJ if A has a single process communicator */
589   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
590   PetscErrorCode ierr;
591   int              size;
592   MPI_Comm         comm;
593   Mat              B=*newmat;
594   Mat_SuperLU_DIST *lu;
595 
596   PetscFunctionBegin;
597   if (B != A) {
598     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
599   }
600 
601   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
602   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
603 
604   lu->MatDuplicate         = A->ops->duplicate;
605   lu->MatView              = A->ops->view;
606   lu->MatAssemblyEnd       = A->ops->assemblyend;
607   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
608   lu->MatDestroy           = A->ops->destroy;
609   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
610 
611   B->spptr                 = (void*)lu;
612   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
613   B->ops->view             = MatView_SuperLU_DIST;
614   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
615   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
616   B->ops->destroy          = MatDestroy_SuperLU_DIST;
617   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
618   if (size == 1) {
619     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
620                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
621     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
622                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
623   } else {
624     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
625                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
626     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
627                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
628   }
629   PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.");
630   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
631   *newmat = B;
632   PetscFunctionReturn(0);
633 }
634 EXTERN_C_END
635 
636 #undef __FUNCT__
637 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
638 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
639   PetscErrorCode ierr;
640   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
641 
642   PetscFunctionBegin;
643   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
644   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
645   PetscFunctionReturn(0);
646 }
647 
648 /*MC
649   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
650   via the external package SuperLU_DIST.
651 
652   If SuperLU_DIST is installed (see the manual for
653   instructions on how to declare the existence of external packages),
654   a matrix type can be constructed which invokes SuperLU_DIST solvers.
655   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
656   This matrix type is only supported for double precision real.
657 
658   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
659   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
660   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
661   for communicators controlling multiple processes.  It is recommended that you call both of
662   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
663   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
664   without data copy.
665 
666   Options Database Keys:
667 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
668 . -mat_superlu_dist_r <n> - number of rows in processor partition
669 . -mat_superlu_dist_c <n> - number of columns in processor partition
670 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
671 . -mat_superlu_dist_equil - equilibrate the matrix
672 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
673 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
674 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
675 . -mat_superlu_dist_iterrefine - use iterative refinement
676 - -mat_superlu_dist_statprint - print factorization information
677 
678    Level: beginner
679 
680 .seealso: PCLU
681 M*/
682 
683 EXTERN_C_BEGIN
684 #undef __FUNCT__
685 #define __FUNCT__ "MatCreate_SuperLU_DIST"
686 PetscErrorCode MatCreate_SuperLU_DIST(Mat A)
687 {
688   PetscErrorCode ierr,size;
689   Mat A_diag;
690 
691   PetscFunctionBegin;
692   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
693   /*   and SuperLU_DIST types */
694   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
695   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
696   if (size == 1) {
697     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
698   } else {
699     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
700     A_diag = ((Mat_MPIAIJ *)A->data)->A;
701     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,&A_diag);CHKERRQ(ierr);
702   }
703   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 EXTERN_C_END
707 
708