xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 4482741e5b2e2bbc854fb1f8dba65221386520f2)
1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2 /*
3         Provides an interface to the SuperLU_DIST_2.0 sparse solver
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
9 #include "stdlib.h"
10 #endif
11 
12 EXTERN_C_BEGIN
13 #if defined(PETSC_USE_COMPLEX)
14 #include "superlu_zdefs.h"
15 #else
16 #include "superlu_ddefs.h"
17 #endif
18 EXTERN_C_END
19 
20 typedef enum { GLOBAL,DISTRIBUTED
21 } SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   MatStructure            flg;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* A few function pointers for inheritance */
42   int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   int (*MatView)(Mat,PetscViewer);
44   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   int (*MatDestroy)(Mat);
47 
48   /* Flag to clean up (non-global) SuperLU objects during Destroy */
49   PetscTruth CleanUpSuperLU_Dist;
50 } Mat_SuperLU_DIST;
51 
52 EXTERN int MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
53 
54 EXTERN_C_BEGIN
55 #undef __FUNCT__
56 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
57 int MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,Mat *newmat) {
58   int              ierr;
59   Mat              B=*newmat;
60   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
61 
62   PetscFunctionBegin;
63   if (B != A) {
64     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
65   }
66   /* Reset the original function pointers */
67   B->ops->duplicate        = lu->MatDuplicate;
68   B->ops->view             = lu->MatView;
69   B->ops->assemblyend      = lu->MatAssemblyEnd;
70   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
71   B->ops->destroy          = lu->MatDestroy;
72 
73   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
74   ierr = PetscFree(lu);CHKERRQ(ierr);
75 
76   *newmat = B;
77   PetscFunctionReturn(0);
78 }
79 EXTERN_C_END
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
83 int MatDestroy_SuperLU_DIST(Mat A)
84 {
85   int              ierr,size;
86   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
87 
88   PetscFunctionBegin;
89   if (lu->CleanUpSuperLU_Dist) {
90     /* Deallocate SuperLU_DIST storage */
91     if (lu->MatInputMode == GLOBAL) {
92       Destroy_CompCol_Matrix_dist(&lu->A_sup);
93     } else {
94       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
95       if ( lu->options.SolveInitialized ) {
96 #if defined(PETSC_USE_COMPLEX)
97         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
98 #else
99         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
100 #endif
101       }
102     }
103     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
104     ScalePermstructFree(&lu->ScalePermstruct);
105     LUstructFree(&lu->LUstruct);
106 
107     /* Release the SuperLU_DIST process grid. */
108     superlu_gridexit(&lu->grid);
109 
110     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
111   }
112 
113   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
114   if (size == 1) {
115     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
116   } else {
117     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
118   }
119   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
120 
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatSolve_SuperLU_DIST"
126 int MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
127 {
128   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
129   int              ierr, size;
130   int              m=A->M, N=A->N;
131   SuperLUStat_t    stat;
132   double           berr[1];
133   PetscScalar      *bptr;
134   int              info, nrhs=1;
135   Vec              x_seq;
136   IS               iden;
137   VecScatter       scat;
138 
139   PetscFunctionBegin;
140   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
141   if (size > 1) {
142     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
143       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
144       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
145       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
146       ierr = ISDestroy(iden);CHKERRQ(ierr);
147 
148       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
149       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
150       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
151     } else { /* distributed mat input */
152       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
153       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
154     }
155   } else { /* size == 1 */
156     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
157     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
158   }
159 
160   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
161 
162   PStatInit(&stat);        /* Initialize the statistics variables. */
163 
164   if (lu->MatInputMode == GLOBAL) {
165 #if defined(PETSC_USE_COMPLEX)
166     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
167                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
168 #else
169     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
170                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
171 #endif
172   } else { /* distributed mat input */
173 #if defined(PETSC_USE_COMPLEX)
174     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
175 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
176     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
177 #else
178     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
179 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
180     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
181 #endif
182   }
183   if (lu->StatPrint) {
184      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
185   }
186   PStatFree(&stat);
187 
188   if (size > 1) {
189     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
190       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
191       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
192       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
193       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
194       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
195     } else {
196       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
197     }
198   } else {
199     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
200   }
201   PetscFunctionReturn(0);
202 }
203 
204 #undef __FUNCT__
205 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
206 int MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F)
207 {
208   Mat              *tseq,A_seq = PETSC_NULL;
209   Mat_SeqAIJ       *aa,*bb;
210   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
211   int              M=A->M,N=A->N,info,ierr,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
212                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
213   SuperLUStat_t    stat;
214   double           *berr=0;
215   IS               isrow;
216   PetscLogDouble   time0,time,time_min,time_max;
217 #if defined(PETSC_USE_COMPLEX)
218   doublecomplex    *av, *bv;
219 #else
220   double           *av, *bv;
221 #endif
222 
223   PetscFunctionBegin;
224   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
225   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
226 
227   if (lu->StatPrint) { /* collect time for mat conversion */
228     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
229     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
230   }
231 
232   if (lu->MatInputMode == GLOBAL) { /* global mat input */
233     if (size > 1) { /* convert mpi A to seq mat A */
234       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr);
235       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr);
236       ierr = ISDestroy(isrow);CHKERRQ(ierr);
237 
238       A_seq = *tseq;
239       ierr = PetscFree(tseq);CHKERRQ(ierr);
240       aa =  (Mat_SeqAIJ*)A_seq->data;
241     } else {
242       aa =  (Mat_SeqAIJ*)A->data;
243     }
244 
245     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
246     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
247 #if defined(PETSC_USE_COMPLEX)
248       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
249 #else
250       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
251 #endif
252     } else { /* successive numeric factorization, sparsity pattern is reused. */
253       Destroy_CompCol_Matrix_dist(&lu->A_sup);
254       Destroy_LU(N, &lu->grid, &lu->LUstruct);
255       lu->options.Fact = SamePattern;
256     }
257 #if defined(PETSC_USE_COMPLEX)
258     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
259 #else
260     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
261 #endif
262 
263     /* Create compressed column matrix A_sup. */
264 #if defined(PETSC_USE_COMPLEX)
265     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
266 #else
267     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
268 #endif
269   } else { /* distributed mat input */
270     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
271     aa=(Mat_SeqAIJ*)(mat->A)->data;
272     bb=(Mat_SeqAIJ*)(mat->B)->data;
273     ai=aa->i; aj=aa->j;
274     bi=bb->i; bj=bb->j;
275 #if defined(PETSC_USE_COMPLEX)
276     av=(doublecomplex*)aa->a;
277     bv=(doublecomplex*)bb->a;
278 #else
279     av=aa->a;
280     bv=bb->a;
281 #endif
282     rstart = mat->rstart;
283     nz     = aa->nz + bb->nz;
284     garray = mat->garray;
285     rstart = mat->rstart;
286 
287     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
288 #if defined(PETSC_USE_COMPLEX)
289       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
290 #else
291       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
292 #endif
293     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
294       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
295       Destroy_LU(N, &lu->grid, &lu->LUstruct);
296       lu->options.Fact = SamePattern;
297     }
298     nz = 0; irow = mat->rstart;
299     for ( i=0; i<m; i++ ) {
300       lu->row[i] = nz;
301       countA = ai[i+1] - ai[i];
302       countB = bi[i+1] - bi[i];
303       ajj = aj + ai[i];  /* ptr to the beginning of this row */
304       bjj = bj + bi[i];
305 
306       /* B part, smaller col index */
307       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
308       jB = 0;
309       for (j=0; j<countB; j++){
310         jcol = garray[bjj[j]];
311         if (jcol > colA_start) {
312           jB = j;
313           break;
314         }
315         lu->col[nz] = jcol;
316         lu->val[nz++] = *bv++;
317         if (j==countB-1) jB = countB;
318       }
319 
320       /* A part */
321       for (j=0; j<countA; j++){
322         lu->col[nz] = mat->rstart + ajj[j];
323         lu->val[nz++] = *av++;
324       }
325 
326       /* B part, larger col index */
327       for (j=jB; j<countB; j++){
328         lu->col[nz] = garray[bjj[j]];
329         lu->val[nz++] = *bv++;
330       }
331     }
332     lu->row[m] = nz;
333 #if defined(PETSC_USE_COMPLEX)
334     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
335 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
336 #else
337     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
338 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
339 #endif
340   }
341   if (lu->StatPrint) {
342     ierr = PetscGetTime(&time);CHKERRQ(ierr);
343     time0 = time - time0;
344   }
345 
346   /* Factor the matrix. */
347   PStatInit(&stat);   /* Initialize the statistics variables. */
348 
349   if (lu->MatInputMode == GLOBAL) { /* global mat input */
350 #if defined(PETSC_USE_COMPLEX)
351     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
352                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
353 #else
354     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
355                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
356 #endif
357   } else { /* distributed mat input */
358 #if defined(PETSC_USE_COMPLEX)
359     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
360 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
361     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
362 #else
363     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
364 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
365     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
366 #endif
367   }
368 
369   if (lu->MatInputMode == GLOBAL && size > 1){
370     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
371   }
372 
373   if (lu->StatPrint) {
374     if (size > 1){
375       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
376       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
377       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
378       time = time/size; /* average time */
379       if (!rank)
380         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
381                               %g / %g / %g\n",time_max,time_min,time);
382     } else {
383       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
384                               %g\n",time0);
385     }
386 
387     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
388   }
389   PStatFree(&stat);
390   (*F)->assembled = PETSC_TRUE;
391   lu->flg         = SAME_NONZERO_PATTERN;
392   PetscFunctionReturn(0);
393 }
394 
395 /* Note the Petsc r and c permutations are ignored */
396 #undef __FUNCT__
397 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
398 int MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
399 {
400   Mat               B;
401   Mat_SuperLU_DIST  *lu;
402   int               ierr,M=A->M,N=A->N,size,indx;
403   superlu_options_t options;
404   PetscTruth        flg;
405   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
406   const char        *prtype[] = {"LargeDiag","NATURAL"};
407 
408   PetscFunctionBegin;
409   /* Create the factorization matrix */
410   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
411   ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr);
412   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
413   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
414 
415   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
416   B->ops->solve            = MatSolve_SuperLU_DIST;
417   B->factor                = FACTOR_LU;
418 
419   lu = (Mat_SuperLU_DIST*)(B->spptr);
420 
421   /* Set the input options */
422   set_default_options_dist(&options);
423   lu->MatInputMode = GLOBAL;
424   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
425 
426   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
427   lu->nprow = size/2;               /* Default process rows.      */
428   if (lu->nprow == 0) lu->nprow = 1;
429   lu->npcol = size/lu->nprow;           /* Default process columns.   */
430 
431   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
432 
433     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
434     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
435     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
436 
437     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
438     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
439 
440     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
441     if (!flg) {
442       options.Equil = NO;
443     }
444 
445     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
446     if (flg) {
447       switch (indx) {
448       case 0:
449         options.RowPerm = LargeDiag;
450         break;
451       case 1:
452         options.RowPerm = NOROWPERM;
453         break;
454       }
455     }
456 
457     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
458     if (flg) {
459       switch (indx) {
460       case 0:
461         options.ColPerm = MMD_AT_PLUS_A;
462         break;
463       case 1:
464         options.ColPerm = NATURAL;
465         break;
466       case 2:
467         options.ColPerm = MMD_ATA;
468         break;
469       case 3:
470         options.ColPerm = COLAMD;
471         break;
472       }
473     }
474 
475     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
476     if (!flg) {
477       options.ReplaceTinyPivot = NO;
478     }
479 
480     options.IterRefine = NOREFINE;
481     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
482     if (flg) {
483       options.IterRefine = DOUBLE;
484     }
485 
486     if (PetscLogPrintInfo) {
487       lu->StatPrint = (int)PETSC_TRUE;
488     } else {
489       lu->StatPrint = (int)PETSC_FALSE;
490     }
491     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
492                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
493   PetscOptionsEnd();
494 
495   /* Initialize the SuperLU process grid. */
496   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
497 
498   /* Initialize ScalePermstruct and LUstruct. */
499   ScalePermstructInit(M, N, &lu->ScalePermstruct);
500   LUstructInit(M, N, &lu->LUstruct);
501 
502   lu->options            = options;
503   lu->flg                = DIFFERENT_NONZERO_PATTERN;
504   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
505   *F = B;
506   PetscFunctionReturn(0);
507 }
508 
509 #undef __FUNCT__
510 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
511 int MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
512   int              ierr;
513   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
514 
515   PetscFunctionBegin;
516   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
517   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
518   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
519   PetscFunctionReturn(0);
520 }
521 
522 #undef __FUNCT__
523 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
524 int MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
525 {
526   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
527   superlu_options_t options;
528   int               ierr;
529   char              *colperm;
530 
531   PetscFunctionBegin;
532   /* check if matrix is superlu_dist type */
533   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
534 
535   options = lu->options;
536   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
537   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
538   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
539   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
540   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
541   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
542   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
543   if (options.ColPerm == NATURAL) {
544     colperm = "NATURAL";
545   } else if (options.ColPerm == MMD_AT_PLUS_A) {
546     colperm = "MMD_AT_PLUS_A";
547   } else if (options.ColPerm == MMD_ATA) {
548     colperm = "MMD_ATA";
549   } else if (options.ColPerm == COLAMD) {
550     colperm = "COLAMD";
551   } else {
552     SETERRQ(1,"Unknown column permutation");
553   }
554   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
555   PetscFunctionReturn(0);
556 }
557 
558 #undef __FUNCT__
559 #define __FUNCT__ "MatView_SuperLU_DIST"
560 int MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
561 {
562   int               ierr;
563   PetscTruth        isascii;
564   PetscViewerFormat format;
565   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
566 
567   PetscFunctionBegin;
568   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
569 
570   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
571   if (isascii) {
572     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
573     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
574       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
575     }
576   }
577   PetscFunctionReturn(0);
578 }
579 
580 
581 EXTERN_C_BEGIN
582 #undef __FUNCT__
583 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
584 int MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,Mat *newmat) {
585   /* This routine is only called to convert to MATSUPERLU_DIST */
586   /* from MATSEQAIJ if A has a single process communicator */
587   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
588   int              ierr, size;
589   MPI_Comm         comm;
590   Mat              B=*newmat;
591   Mat_SuperLU_DIST *lu;
592 
593   PetscFunctionBegin;
594   if (B != A) {
595     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
596   }
597 
598   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
599   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
600 
601   lu->MatDuplicate         = A->ops->duplicate;
602   lu->MatView              = A->ops->view;
603   lu->MatAssemblyEnd       = A->ops->assemblyend;
604   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
605   lu->MatDestroy           = A->ops->destroy;
606   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
607 
608   B->spptr                 = (void*)lu;
609   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
610   B->ops->view             = MatView_SuperLU_DIST;
611   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
612   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
613   B->ops->destroy          = MatDestroy_SuperLU_DIST;
614   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
615   if (size == 1) {
616     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
617                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
618     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
619                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
620   } else {
621     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
622                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
623     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
624                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
625   }
626   PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.");
627   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
628   *newmat = B;
629   PetscFunctionReturn(0);
630 }
631 EXTERN_C_END
632 
633 #undef __FUNCT__
634 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
635 int MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
636   int              ierr;
637   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
638 
639   PetscFunctionBegin;
640   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
641   ierr = MatConvert_Base_SuperLU_DIST(*M,MATSUPERLU_DIST,M);CHKERRQ(ierr);
642   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
643   PetscFunctionReturn(0);
644 }
645 
646 /*MC
647   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
648   via the external package SuperLU_DIST.
649 
650   If SuperLU_DIST is installed (see the manual for
651   instructions on how to declare the existence of external packages),
652   a matrix type can be constructed which invokes SuperLU_DIST solvers.
653   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
654   This matrix type is only supported for double precision real.
655 
656   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
657   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
658   MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
659   for communicators controlling multiple processes.  It is recommended that you call both of
660   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
661   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
662   without data copy.
663 
664   Options Database Keys:
665 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
666 . -mat_superlu_dist_r <n> - number of rows in processor partition
667 . -mat_superlu_dist_c <n> - number of columns in processor partition
668 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
669 . -mat_superlu_dist_equil - equilibrate the matrix
670 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
671 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
672 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
673 . -mat_superlu_dist_iterrefine - use iterative refinement
674 - -mat_superlu_dist_statprint - print factorization information
675 
676    Level: beginner
677 
678 .seealso: PCLU
679 M*/
680 
681 EXTERN_C_BEGIN
682 #undef __FUNCT__
683 #define __FUNCT__ "MatCreate_SuperLU_DIST"
684 int MatCreate_SuperLU_DIST(Mat A) {
685   int ierr,size;
686   Mat A_diag;
687 
688   PetscFunctionBegin;
689   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
690   /*   and SuperLU_DIST types */
691   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
692   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
693   if (size == 1) {
694     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
695   } else {
696     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
697     A_diag = ((Mat_MPIAIJ *)A->data)->A;
698     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,&A_diag);CHKERRQ(ierr);
699   }
700   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 EXTERN_C_END
704 
705