xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision b2da817d11b8f98ec4a15fcfa65f2fa8cfedc826)
1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2 /*
3         Provides an interface to the SuperLU_DIST_2.0 sparse solver
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
9 #include "stdlib.h"
10 #endif
11 
12 EXTERN_C_BEGIN
13 #if defined(PETSC_USE_COMPLEX)
14 #include "superlu_zdefs.h"
15 #else
16 #include "superlu_ddefs.h"
17 #endif
18 EXTERN_C_END
19 
20 typedef enum { GLOBAL,DISTRIBUTED
21 } SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   MatStructure            flg;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* A few function pointers for inheritance */
42   int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   int (*MatView)(Mat,PetscViewer);
44   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   int (*MatDestroy)(Mat);
47 
48   /* Flag to clean up (non-global) SuperLU objects during Destroy */
49   PetscTruth CleanUpSuperLU_Dist;
50 } Mat_SuperLU_DIST;
51 
52 EXTERN int MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
53 
54 EXTERN_C_BEGIN
55 #undef __FUNCT__
56 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
57 int MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,Mat *newmat) {
58   int              ierr;
59   Mat              B=*newmat;
60   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
61 
62   PetscFunctionBegin;
63   if (B != A) {
64     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
65   }
66   /* Reset the original function pointers */
67   B->ops->duplicate        = lu->MatDuplicate;
68   B->ops->view             = lu->MatView;
69   B->ops->assemblyend      = lu->MatAssemblyEnd;
70   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
71   B->ops->destroy          = lu->MatDestroy;
72 
73   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
74   ierr = PetscFree(lu);CHKERRQ(ierr);
75 
76   *newmat = B;
77   PetscFunctionReturn(0);
78 }
79 EXTERN_C_END
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
83 int MatDestroy_SuperLU_DIST(Mat A)
84 {
85   int              ierr,size;
86   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
87 
88   PetscFunctionBegin;
89   if (lu->CleanUpSuperLU_Dist) {
90     /* Deallocate SuperLU_DIST storage */
91     if (lu->MatInputMode == GLOBAL) {
92       Destroy_CompCol_Matrix_dist(&lu->A_sup);
93     } else {
94       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
95       if ( lu->options.SolveInitialized ) {
96 #if defined(PETSC_USE_COMPLEX)
97         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
98 #else
99         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
100 #endif
101       }
102     }
103     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
104     ScalePermstructFree(&lu->ScalePermstruct);
105     LUstructFree(&lu->LUstruct);
106 
107     /* Release the SuperLU_DIST process grid. */
108     superlu_gridexit(&lu->grid);
109 
110     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
111   }
112 
113   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
114   if (size == 1) {
115     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
116   } else {
117     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
118   }
119   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
120 
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatSolve_SuperLU_DIST"
126 int MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
127 {
128   Mat_MPIAIJ       *aa = (Mat_MPIAIJ*)A->data;
129   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
130   int              ierr, size;
131   int              m=A->M, N=A->N;
132   SuperLUStat_t    stat;
133   double           berr[1];
134   PetscScalar      *bptr;
135   int              info, nrhs=1;
136   Vec              x_seq;
137   IS               iden;
138   VecScatter       scat;
139 
140   PetscFunctionBegin;
141   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
142   if (size > 1) {
143     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
144       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
145       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
146       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
147       ierr = ISDestroy(iden);CHKERRQ(ierr);
148 
149       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
150       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
151       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
152     } else { /* distributed mat input */
153       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
154       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
155     }
156   } else { /* size == 1 */
157     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
158     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
159   }
160 
161   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
162 
163   PStatInit(&stat);        /* Initialize the statistics variables. */
164 
165   if (lu->MatInputMode == GLOBAL) {
166 #if defined(PETSC_USE_COMPLEX)
167     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
168                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
169 #else
170     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
171                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
172 #endif
173   } else { /* distributed mat input */
174 #if defined(PETSC_USE_COMPLEX)
175     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
176 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
177     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
178 #else
179     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
180 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
181     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
182 #endif
183   }
184   if (lu->StatPrint) {
185      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
186   }
187   PStatFree(&stat);
188 
189   if (size > 1) {
190     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
191       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
192       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
193       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
194       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
195       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
196     } else {
197       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
198     }
199   } else {
200     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
201   }
202   PetscFunctionReturn(0);
203 }
204 
205 #undef __FUNCT__
206 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
207 int MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F)
208 {
209   Mat_MPIAIJ       *fac = (Mat_MPIAIJ*)(*F)->data,*mat;
210   Mat              *tseq,A_seq = PETSC_NULL;
211   Mat_SeqAIJ       *aa,*bb;
212   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
213   int              M=A->M,N=A->N,info,ierr,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
214                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
215   SuperLUStat_t    stat;
216   double           *berr=0;
217   IS               isrow;
218   PetscLogDouble   time0,time,time_min,time_max;
219 #if defined(PETSC_USE_COMPLEX)
220   doublecomplex    *av, *bv;
221 #else
222   double           *av, *bv;
223 #endif
224 
225   PetscFunctionBegin;
226   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
227   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
228 
229   if (lu->StatPrint) { /* collect time for mat conversion */
230     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
231     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
232   }
233 
234   if (lu->MatInputMode == GLOBAL) { /* global mat input */
235     if (size > 1) { /* convert mpi A to seq mat A */
236       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr);
237       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr);
238       ierr = ISDestroy(isrow);CHKERRQ(ierr);
239 
240       A_seq = *tseq;
241       ierr = PetscFree(tseq);CHKERRQ(ierr);
242       aa =  (Mat_SeqAIJ*)A_seq->data;
243     } else {
244       aa =  (Mat_SeqAIJ*)A->data;
245     }
246 
247     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
248     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
249 #if defined(PETSC_USE_COMPLEX)
250       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
251 #else
252       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
253 #endif
254     } else { /* successive numeric factorization, sparsity pattern is reused. */
255       Destroy_CompCol_Matrix_dist(&lu->A_sup);
256       Destroy_LU(N, &lu->grid, &lu->LUstruct);
257       lu->options.Fact = SamePattern;
258     }
259 #if defined(PETSC_USE_COMPLEX)
260     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
261 #else
262     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
263 #endif
264 
265     /* Create compressed column matrix A_sup. */
266 #if defined(PETSC_USE_COMPLEX)
267     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
268 #else
269     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
270 #endif
271   } else { /* distributed mat input */
272     mat =  (Mat_MPIAIJ*)A->data;
273     aa=(Mat_SeqAIJ*)(mat->A)->data;
274     bb=(Mat_SeqAIJ*)(mat->B)->data;
275     ai=aa->i; aj=aa->j;
276     bi=bb->i; bj=bb->j;
277 #if defined(PETSC_USE_COMPLEX)
278     av=(doublecomplex*)aa->a;
279     bv=(doublecomplex*)bb->a;
280 #else
281     av=aa->a;
282     bv=bb->a;
283 #endif
284     rstart = mat->rstart;
285     nz     = aa->nz + bb->nz;
286     garray = mat->garray;
287     rstart = mat->rstart;
288 
289     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
290 #if defined(PETSC_USE_COMPLEX)
291       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
292 #else
293       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
294 #endif
295     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
296       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
297       Destroy_LU(N, &lu->grid, &lu->LUstruct);
298       lu->options.Fact = SamePattern;
299     }
300     nz = 0; irow = mat->rstart;
301     for ( i=0; i<m; i++ ) {
302       lu->row[i] = nz;
303       countA = ai[i+1] - ai[i];
304       countB = bi[i+1] - bi[i];
305       ajj = aj + ai[i];  /* ptr to the beginning of this row */
306       bjj = bj + bi[i];
307 
308       /* B part, smaller col index */
309       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
310       jB = 0;
311       for (j=0; j<countB; j++){
312         jcol = garray[bjj[j]];
313         if (jcol > colA_start) {
314           jB = j;
315           break;
316         }
317         lu->col[nz] = jcol;
318         lu->val[nz++] = *bv++;
319         if (j==countB-1) jB = countB;
320       }
321 
322       /* A part */
323       for (j=0; j<countA; j++){
324         lu->col[nz] = mat->rstart + ajj[j];
325         lu->val[nz++] = *av++;
326       }
327 
328       /* B part, larger col index */
329       for (j=jB; j<countB; j++){
330         lu->col[nz] = garray[bjj[j]];
331         lu->val[nz++] = *bv++;
332       }
333     }
334     lu->row[m] = nz;
335 #if defined(PETSC_USE_COMPLEX)
336     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
337 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
338 #else
339     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
340 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
341 #endif
342   }
343   if (lu->StatPrint) {
344     ierr = PetscGetTime(&time);CHKERRQ(ierr);
345     time0 = time - time0;
346   }
347 
348   /* Factor the matrix. */
349   PStatInit(&stat);   /* Initialize the statistics variables. */
350 
351   if (lu->MatInputMode == GLOBAL) { /* global mat input */
352 #if defined(PETSC_USE_COMPLEX)
353     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
354                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
355 #else
356     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
357                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
358 #endif
359   } else { /* distributed mat input */
360 #if defined(PETSC_USE_COMPLEX)
361     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
362 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
363     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
364 #else
365     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
366 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
367     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
368 #endif
369   }
370 
371   if (lu->MatInputMode == GLOBAL && size > 1){
372     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
373   }
374 
375   if (lu->StatPrint) {
376     if (size > 1){
377       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
378       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
379       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
380       time = time/size; /* average time */
381       if (!rank)
382         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
383                               %g / %g / %g\n",time_max,time_min,time);
384     } else {
385       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
386                               %g\n",time0);
387     }
388 
389     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
390   }
391   PStatFree(&stat);
392   (*F)->assembled = PETSC_TRUE;
393   lu->flg         = SAME_NONZERO_PATTERN;
394   PetscFunctionReturn(0);
395 }
396 
397 /* Note the Petsc r and c permutations are ignored */
398 #undef __FUNCT__
399 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
400 int MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
401 {
402   Mat               B;
403   Mat_SuperLU_DIST  *lu;
404   int               ierr,M=A->M,N=A->N,size,indx;
405   superlu_options_t options;
406   PetscTruth        flg;
407   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
408   const char        *prtype[] = {"LargeDiag","NATURAL"};
409 
410   PetscFunctionBegin;
411   /* Create the factorization matrix */
412   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
413   ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr);
414   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
415   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
416 
417   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
418   B->ops->solve            = MatSolve_SuperLU_DIST;
419   B->factor                = FACTOR_LU;
420 
421   lu = (Mat_SuperLU_DIST*)(B->spptr);
422 
423   /* Set the input options */
424   set_default_options_dist(&options);
425   lu->MatInputMode = GLOBAL;
426   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
427 
428   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
429   lu->nprow = size/2;               /* Default process rows.      */
430   if (lu->nprow == 0) lu->nprow = 1;
431   lu->npcol = size/lu->nprow;           /* Default process columns.   */
432 
433   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
434 
435     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
436     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
437     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
438 
439     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
440     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
441 
442     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
443     if (!flg) {
444       options.Equil = NO;
445     }
446 
447     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
448     if (flg) {
449       switch (indx) {
450       case 0:
451         options.RowPerm = LargeDiag;
452         break;
453       case 1:
454         options.RowPerm = NOROWPERM;
455         break;
456       }
457     }
458 
459     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
460     if (flg) {
461       switch (indx) {
462       case 0:
463         options.ColPerm = MMD_AT_PLUS_A;
464         break;
465       case 1:
466         options.ColPerm = NATURAL;
467         break;
468       case 2:
469         options.ColPerm = MMD_ATA;
470         break;
471       case 3:
472         options.ColPerm = COLAMD;
473         break;
474       }
475     }
476 
477     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
478     if (!flg) {
479       options.ReplaceTinyPivot = NO;
480     }
481 
482     options.IterRefine = NOREFINE;
483     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
484     if (flg) {
485       options.IterRefine = DOUBLE;
486     }
487 
488     if (PetscLogPrintInfo) {
489       lu->StatPrint = (int)PETSC_TRUE;
490     } else {
491       lu->StatPrint = (int)PETSC_FALSE;
492     }
493     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
494                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
495   PetscOptionsEnd();
496 
497   /* Initialize the SuperLU process grid. */
498   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
499 
500   /* Initialize ScalePermstruct and LUstruct. */
501   ScalePermstructInit(M, N, &lu->ScalePermstruct);
502   LUstructInit(M, N, &lu->LUstruct);
503 
504   lu->options            = options;
505   lu->flg                = DIFFERENT_NONZERO_PATTERN;
506   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
507   *F = B;
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNCT__
512 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
513 int MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
514   int              ierr;
515   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
516 
517   PetscFunctionBegin;
518   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
519   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
520   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
526 int MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
527 {
528   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
529   superlu_options_t options;
530   int               ierr;
531   char              *colperm;
532 
533   PetscFunctionBegin;
534   /* check if matrix is superlu_dist type */
535   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
536 
537   options = lu->options;
538   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
539   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
540   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
541   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
542   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
543   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
544   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
545   if (options.ColPerm == NATURAL) {
546     colperm = "NATURAL";
547   } else if (options.ColPerm == MMD_AT_PLUS_A) {
548     colperm = "MMD_AT_PLUS_A";
549   } else if (options.ColPerm == MMD_ATA) {
550     colperm = "MMD_ATA";
551   } else if (options.ColPerm == COLAMD) {
552     colperm = "COLAMD";
553   } else {
554     SETERRQ(1,"Unknown column permutation");
555   }
556   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
557   PetscFunctionReturn(0);
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatView_SuperLU_DIST"
562 int MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
563 {
564   int               ierr;
565   PetscTruth        isascii;
566   PetscViewerFormat format;
567   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
568 
569   PetscFunctionBegin;
570   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
571 
572   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
573   if (isascii) {
574     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
575     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
576       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
577     }
578   }
579   PetscFunctionReturn(0);
580 }
581 
582 
583 EXTERN_C_BEGIN
584 #undef __FUNCT__
585 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
586 int MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,Mat *newmat) {
587   /* This routine is only called to convert to MATSUPERLU_DIST */
588   /* from MATSEQAIJ if A has a single process communicator */
589   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
590   int              ierr, size;
591   MPI_Comm         comm;
592   Mat              B=*newmat;
593   Mat_SuperLU_DIST *lu;
594 
595   PetscFunctionBegin;
596   if (B != A) {
597     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
598   }
599 
600   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
601   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
602 
603   lu->MatDuplicate         = A->ops->duplicate;
604   lu->MatView              = A->ops->view;
605   lu->MatAssemblyEnd       = A->ops->assemblyend;
606   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
607   lu->MatDestroy           = A->ops->destroy;
608   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
609 
610   B->spptr                 = (void*)lu;
611   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
612   B->ops->view             = MatView_SuperLU_DIST;
613   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
614   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
615   B->ops->destroy          = MatDestroy_SuperLU_DIST;
616   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
617   if (size == 1) {
618     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
619                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
620     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
621                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
622   } else {
623     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
624                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
625     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
626                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
627   }
628   PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.");
629   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
630   *newmat = B;
631   PetscFunctionReturn(0);
632 }
633 EXTERN_C_END
634 
635 #undef __FUNCT__
636 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
637 int MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
638   int              ierr;
639   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
640 
641   PetscFunctionBegin;
642   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
643   ierr = MatConvert_Base_SuperLU_DIST(*M,MATSUPERLU_DIST,M);CHKERRQ(ierr);
644   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
645   PetscFunctionReturn(0);
646 }
647 
648 /*MC
649   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
650   via the external package SuperLU_DIST.
651 
652   If SuperLU_DIST is installed (see the manual for
653   instructions on how to declare the existence of external packages),
654   a matrix type can be constructed which invokes SuperLU_DIST solvers.
655   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
656   This matrix type is only supported for double precision real.
657 
658   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
659   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
660   MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
661   for communicators controlling multiple processes.  It is recommended that you call both of
662   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
663   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
664   without data copy.
665 
666   Options Database Keys:
667 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
668 . -mat_superlu_dist_r <n> - number of rows in processor partition
669 . -mat_superlu_dist_c <n> - number of columns in processor partition
670 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
671 . -mat_superlu_dist_equil - equilibrate the matrix
672 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
673 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
674 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
675 . -mat_superlu_dist_iterrefine - use iterative refinement
676 - -mat_superlu_dist_statprint - print factorization information
677 
678    Level: beginner
679 
680 .seealso: PCLU
681 M*/
682 
683 EXTERN_C_BEGIN
684 #undef __FUNCT__
685 #define __FUNCT__ "MatCreate_SuperLU_DIST"
686 int MatCreate_SuperLU_DIST(Mat A) {
687   int ierr,size;
688 
689   PetscFunctionBegin;
690   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
691   /*   and SuperLU_DIST types */
692   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
693   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
694   if (size == 1) {
695     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
696   } else {
697     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
698   }
699   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr);
700   PetscFunctionReturn(0);
701 }
702 EXTERN_C_END
703 
704