xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision e249d750e7b7b0c489f97a51fdf6db4e4d73d5c7)
1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2 /*
3         Provides an interface to the SuperLU_DIST_2.0 sparse solver
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
9 #include "stdlib.h"
10 #endif
11 
12 EXTERN_C_BEGIN
13 #if defined(PETSC_USE_COMPLEX)
14 #include "superlu_zdefs.h"
15 #else
16 #include "superlu_ddefs.h"
17 #endif
18 EXTERN_C_END
19 
20 typedef enum { GLOBAL,DISTRIBUTED
21 } SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   MatStructure            flg;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   int size;
42   /* A few function pointers for inheritance */
43   int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   int (*MatView)(Mat,PetscViewer);
45   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   int (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN int MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 
55 EXTERN_C_BEGIN
56 #undef __FUNCT__
57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
58 int MatConvert_SuperLU_DIST_Base(Mat A,MatType type,Mat *newmat) {
59   int              ierr;
60   Mat              B=*newmat;
61   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
62 
63   PetscFunctionBegin;
64   if (B != A) {
65     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
66   }
67   /* Reset the original function pointers */
68   B->ops->duplicate        = lu->MatDuplicate;
69   B->ops->view             = lu->MatView;
70   B->ops->assemblyend      = lu->MatAssemblyEnd;
71   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
72   B->ops->destroy          = lu->MatDestroy;
73 
74   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
75   ierr = PetscFree(lu);CHKERRQ(ierr);
76 
77   *newmat = B;
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
84 int MatDestroy_SuperLU_DIST(Mat A)
85 {
86   int              ierr;
87   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
88 
89   PetscFunctionBegin;
90   if (lu->CleanUpSuperLU_Dist) {
91     /* Deallocate SuperLU_DIST storage */
92     if (lu->MatInputMode == GLOBAL) {
93       Destroy_CompCol_Matrix_dist(&lu->A_sup);
94     } else {
95       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
96       if ( lu->options.SolveInitialized ) {
97 #if defined(PETSC_USE_COMPLEX)
98         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
99 #else
100         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
101 #endif
102       }
103     }
104     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
105     ScalePermstructFree(&lu->ScalePermstruct);
106     LUstructFree(&lu->LUstruct);
107 
108     /* Release the SuperLU_DIST process grid. */
109     superlu_gridexit(&lu->grid);
110 
111     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
112   }
113 
114   if (lu->size == 1) {
115     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
116   } else {
117     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
118   }
119   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
120 
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatSolve_SuperLU_DIST"
126 int MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
127 {
128   Mat_MPIAIJ       *aa = (Mat_MPIAIJ*)A->data;
129   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
130   int              ierr, size=aa->size;
131   int              m=A->M, N=A->N;
132   SuperLUStat_t    stat;
133   double           berr[1];
134   PetscScalar      *bptr;
135   int              info, nrhs=1;
136   Vec              x_seq;
137   IS               iden;
138   VecScatter       scat;
139   PetscLogDouble   time0,time,time_min,time_max;
140 
141   PetscFunctionBegin;
142   if (size > 1) {
143     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
144       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
145       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
146       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
147       ierr = ISDestroy(iden);CHKERRQ(ierr);
148 
149       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
150       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
151       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
152     } else { /* distributed mat input */
153       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
154       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
155     }
156   } else { /* size == 1 */
157     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
158     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
159   }
160 
161   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
162 
163   PStatInit(&stat);        /* Initialize the statistics variables. */
164   if (lu->StatPrint) {
165     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); /* to be removed */
166     ierr = PetscGetTime(&time0);CHKERRQ(ierr);  /* to be removed */
167   }
168   if (lu->MatInputMode == GLOBAL) {
169 #if defined(PETSC_USE_COMPLEX)
170     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
171                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
172 #else
173     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
174                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
175 #endif
176   } else { /* distributed mat input */
177 #if defined(PETSC_USE_COMPLEX)
178     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
179 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
180     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
181 #else
182     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
183 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
184     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
185 #endif
186   }
187   if (lu->StatPrint) {
188     ierr = PetscGetTime(&time);CHKERRQ(ierr);  /* to be removed */
189      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
190   }
191   PStatFree(&stat);
192 
193   if (size > 1) {
194     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
195       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
196       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
197       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
198       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
199       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
200     } else {
201       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
202     }
203   } else {
204     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
205   }
206   if (lu->StatPrint) {
207     time0 = time - time0;
208     ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);CHKERRQ(ierr);
209     ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);CHKERRQ(ierr);
210     ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);CHKERRQ(ierr);
211     time = time/size; /* average time */
212     ierr = PetscPrintf(A->comm, "  Time for superlu_dist solve (max/min/avg): %g / %g / %g\n\n",time_max,time_min,time);CHKERRQ(ierr);
213   }
214   PetscFunctionReturn(0);
215 }
216 
217 #undef __FUNCT__
218 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
219 int MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F)
220 {
221   Mat_MPIAIJ       *fac = (Mat_MPIAIJ*)(*F)->data,*mat;
222   Mat              *tseq,A_seq = PETSC_NULL;
223   Mat_SeqAIJ       *aa,*bb;
224   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
225   int              M=A->M,N=A->N,info,ierr,size=fac->size,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
226                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
227   SuperLUStat_t    stat;
228   double           *berr=0;
229   IS               isrow;
230   PetscLogDouble   time0[2],time[2],time_min[2],time_max[2];
231 #if defined(PETSC_USE_COMPLEX)
232   doublecomplex    *av, *bv;
233 #else
234   double           *av, *bv;
235 #endif
236 
237   PetscFunctionBegin;
238   if (lu->StatPrint) {
239     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
240     ierr = PetscGetTime(&time0[0]);CHKERRQ(ierr);
241   }
242 
243   if (lu->MatInputMode == GLOBAL) { /* global mat input */
244     if (size > 1) { /* convert mpi A to seq mat A */
245       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr);
246       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr);
247       ierr = ISDestroy(isrow);CHKERRQ(ierr);
248 
249       A_seq = *tseq;
250       ierr = PetscFree(tseq);CHKERRQ(ierr);
251       aa =  (Mat_SeqAIJ*)A_seq->data;
252     } else {
253       aa =  (Mat_SeqAIJ*)A->data;
254     }
255 
256     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
257     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
258 #if defined(PETSC_USE_COMPLEX)
259       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
260 #else
261       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
262 #endif
263     } else { /* successive numeric factorization, sparsity pattern is reused. */
264       Destroy_CompCol_Matrix_dist(&lu->A_sup);
265       Destroy_LU(N, &lu->grid, &lu->LUstruct);
266       lu->options.Fact = SamePattern;
267     }
268 #if defined(PETSC_USE_COMPLEX)
269     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
270 #else
271     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
272 #endif
273 
274     /* Create compressed column matrix A_sup. */
275 #if defined(PETSC_USE_COMPLEX)
276     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
277 #else
278     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
279 #endif
280   } else { /* distributed mat input */
281     mat =  (Mat_MPIAIJ*)A->data;
282     aa=(Mat_SeqAIJ*)(mat->A)->data;
283     bb=(Mat_SeqAIJ*)(mat->B)->data;
284     ai=aa->i; aj=aa->j;
285     bi=bb->i; bj=bb->j;
286 #if defined(PETSC_USE_COMPLEX)
287     av=(doublecomplex*)aa->a;
288     bv=(doublecomplex*)bb->a;
289 #else
290     av=aa->a;
291     bv=bb->a;
292 #endif
293     rstart = mat->rstart;
294     nz     = aa->nz + bb->nz;
295     garray = mat->garray;
296     rstart = mat->rstart;
297 
298     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
299 #if defined(PETSC_USE_COMPLEX)
300       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
301 #else
302       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
303 #endif
304     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
305       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
306       Destroy_LU(N, &lu->grid, &lu->LUstruct);
307       lu->options.Fact = SamePattern;
308     }
309     nz = 0; irow = mat->rstart;
310     for ( i=0; i<m; i++ ) {
311       lu->row[i] = nz;
312       countA = ai[i+1] - ai[i];
313       countB = bi[i+1] - bi[i];
314       ajj = aj + ai[i];  /* ptr to the beginning of this row */
315       bjj = bj + bi[i];
316 
317       /* B part, smaller col index */
318       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
319       jB = 0;
320       for (j=0; j<countB; j++){
321         jcol = garray[bjj[j]];
322         if (jcol > colA_start) {
323           jB = j;
324           break;
325         }
326         lu->col[nz] = jcol;
327         lu->val[nz++] = *bv++;
328         if (j==countB-1) jB = countB;
329       }
330 
331       /* A part */
332       for (j=0; j<countA; j++){
333         lu->col[nz] = mat->rstart + ajj[j];
334         lu->val[nz++] = *av++;
335       }
336 
337       /* B part, larger col index */
338       for (j=jB; j<countB; j++){
339         lu->col[nz] = garray[bjj[j]];
340         lu->val[nz++] = *bv++;
341       }
342     }
343     lu->row[m] = nz;
344 #if defined(PETSC_USE_COMPLEX)
345     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
346 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
347 #else
348     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
349 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
350 #endif
351   }
352   if (lu->StatPrint) {
353     ierr = PetscGetTime(&time[0]);CHKERRQ(ierr);
354     time0[0] = time[0] - time0[0];
355   }
356 
357   /* Factor the matrix. */
358   PStatInit(&stat);   /* Initialize the statistics variables. */
359 
360   if (lu->StatPrint) {
361     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
362     ierr = PetscGetTime(&time0[1]);CHKERRQ(ierr);
363   }
364 
365   if (lu->MatInputMode == GLOBAL) { /* global mat input */
366 #if defined(PETSC_USE_COMPLEX)
367     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
368                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
369 #else
370     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
371                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
372 #endif
373   } else { /* distributed mat input */
374 #if defined(PETSC_USE_COMPLEX)
375     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
376 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
377     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
378 #else
379     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
380 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
381     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
382 #endif
383   }
384   if (lu->StatPrint) {
385     ierr = PetscGetTime(&time[1]);CHKERRQ(ierr);  /* to be removed */
386     time0[1] = time[1] - time0[1];
387     if (lu->StatPrint) PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
388   }
389   PStatFree(&stat);
390 
391   if (lu->MatInputMode == GLOBAL && size > 1){
392     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
393   }
394 
395   if (lu->StatPrint) {
396     ierr = MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm);
397     ierr = MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm);
398     ierr = MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm);
399     for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */
400     ierr = PetscPrintf(A->comm, "  Time for mat conversion (max/min/avg):    %g / %g / %g\n",time_max[0],time_min[0],time[0]);
401     ierr = PetscPrintf(A->comm, "  Time for superlu_dist fact (max/min/avg): %g / %g / %g\n\n",time_max[1],time_min[1],time[1]);
402   }
403   (*F)->assembled = PETSC_TRUE;
404   lu->flg         = SAME_NONZERO_PATTERN;
405   PetscFunctionReturn(0);
406 }
407 
408 /* Note the Petsc r and c permutations are ignored */
409 #undef __FUNCT__
410 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
411 int MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
412 {
413   Mat               B;
414   Mat_SuperLU_DIST  *lu;
415   int               ierr,M=A->M,N=A->N,size;
416   superlu_options_t options;
417   char              buff[32];
418   PetscTruth        flg;
419   char              *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
420   char              *prtype[] = {"LargeDiag","NATURAL"};
421 
422   PetscFunctionBegin;
423   /* Create the factorization matrix */
424   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
425   ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr);
426   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
427   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
428 
429   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
430   B->ops->solve            = MatSolve_SuperLU_DIST;
431   B->factor                = FACTOR_LU;
432 
433   lu = (Mat_SuperLU_DIST*)(B->spptr);
434 
435   /* Set the input options */
436   set_default_options(&options);
437   lu->MatInputMode = GLOBAL;
438   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
439 
440   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
441   lu->nprow = size/2;               /* Default process rows.      */
442   if (lu->nprow == 0) lu->nprow = 1;
443   lu->npcol = size/lu->nprow;           /* Default process columns.   */
444 
445   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
446 
447     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
448     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
449     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
450 
451     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
452     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
453 
454     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
455     if (!flg) {
456       options.Equil = NO;
457     }
458 
459     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);CHKERRQ(ierr);
460     while (flg) {
461       ierr = PetscStrcmp(buff,"LargeDiag",&flg);CHKERRQ(ierr);
462       if (flg) {
463         options.RowPerm = LargeDiag;
464         break;
465       }
466       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
467       if (flg) {
468         options.RowPerm = NOROWPERM;
469         break;
470       }
471       SETERRQ1(1,"Unknown row permutation %s",buff);
472     }
473 
474     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);CHKERRQ(ierr);
475     while (flg) {
476       ierr = PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);CHKERRQ(ierr);
477       if (flg) {
478         options.ColPerm = MMD_AT_PLUS_A;
479         break;
480       }
481       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
482       if (flg) {
483         options.ColPerm = NATURAL;
484         break;
485       }
486       ierr = PetscStrcmp(buff,"MMD_ATA",&flg);CHKERRQ(ierr);
487       if (flg) {
488         options.ColPerm = MMD_ATA;
489         break;
490       }
491       ierr = PetscStrcmp(buff,"COLAMD",&flg);CHKERRQ(ierr);
492       if (flg) {
493         options.ColPerm = COLAMD;
494         break;
495       }
496       SETERRQ1(1,"Unknown column permutation %s",buff);
497     }
498 
499     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
500     if (!flg) {
501       options.ReplaceTinyPivot = NO;
502     }
503 
504     options.IterRefine = NOREFINE;
505     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
506     if (flg) {
507       options.IterRefine = DOUBLE;
508     }
509 
510     if (PetscLogPrintInfo) {
511       lu->StatPrint = (int)PETSC_TRUE;
512     } else {
513       lu->StatPrint = (int)PETSC_FALSE;
514     }
515     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
516                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
517   PetscOptionsEnd();
518 
519   /* Initialize the SuperLU process grid. */
520   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
521 
522   /* Initialize ScalePermstruct and LUstruct. */
523   ScalePermstructInit(M, N, &lu->ScalePermstruct);
524   LUstructInit(M, N, &lu->LUstruct);
525 
526   lu->options            = options;
527   lu->flg                = DIFFERENT_NONZERO_PATTERN;
528   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
529   *F = B;
530   PetscFunctionReturn(0);
531 }
532 
533 #undef __FUNCT__
534 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
535 int MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
536   int              ierr;
537   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
538 
539   PetscFunctionBegin;
540   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
541   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
542   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
543   PetscFunctionReturn(0);
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
548 int MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
549 {
550   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
551   superlu_options_t options;
552   int               ierr;
553   char              *colperm;
554 
555   PetscFunctionBegin;
556   /* check if matrix is superlu_dist type */
557   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
558 
559   options = lu->options;
560   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
561   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
562   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
563   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
564   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
565   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
566   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
567   if (options.ColPerm == NATURAL) {
568     colperm = "NATURAL";
569   } else if (options.ColPerm == MMD_AT_PLUS_A) {
570     colperm = "MMD_AT_PLUS_A";
571   } else if (options.ColPerm == MMD_ATA) {
572     colperm = "MMD_ATA";
573   } else if (options.ColPerm == COLAMD) {
574     colperm = "COLAMD";
575   } else {
576     SETERRQ(1,"Unknown column permutation");
577   }
578   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatView_SuperLU_DIST"
584 int MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
585 {
586   int               ierr;
587   PetscTruth        isascii;
588   PetscViewerFormat format;
589   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
590 
591   PetscFunctionBegin;
592   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
593 
594   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
595   if (isascii) {
596     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
597     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
598       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 
605 EXTERN_C_BEGIN
606 #undef __FUNCT__
607 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
608 int MatConvert_Base_SuperLU_DIST(Mat A,MatType type,Mat *newmat) {
609   /* This routine is only called to convert to MATSUPERLU_DIST */
610   /* from MATSEQAIJ if A has a single process communicator */
611   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
612   int              ierr;
613   MPI_Comm         comm;
614   Mat              B=*newmat;
615   Mat_SuperLU_DIST *lu;
616 
617   PetscFunctionBegin;
618   if (B != A) {
619     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
620   }
621 
622   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
623   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
624 
625   lu->MatDuplicate         = A->ops->duplicate;
626   lu->MatView              = A->ops->view;
627   lu->MatAssemblyEnd       = A->ops->assemblyend;
628   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
629   lu->MatDestroy           = A->ops->destroy;
630   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
631 
632   B->spptr                 = (void*)lu;
633   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
634   B->ops->view             = MatView_SuperLU_DIST;
635   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
636   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
637   B->ops->destroy          = MatDestroy_SuperLU_DIST;
638   ierr = MPI_Comm_size(comm,&(lu->size));CHKERRQ(ierr);CHKERRQ(ierr);
639   if ((lu->size) == 1) {
640     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
641                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
642     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
643                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
644   } else {
645     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
646                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
647     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
648                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
649   }
650   PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.");
651   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
652   *newmat = B;
653   PetscFunctionReturn(0);
654 }
655 EXTERN_C_END
656 
657 #undef __FUNCT__
658 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
659 int MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
660   int ierr;
661   PetscFunctionBegin;
662   ierr = (*A->ops->duplicate)(A,op,M);CHKERRQ(ierr);
663   ierr = MatConvert_Base_SuperLU_DIST(*M,MATSUPERLU_DIST,M);CHKERRQ(ierr);
664   PetscFunctionReturn(0);
665 }
666 
667 /*MC
668   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
669   via the external package SuperLU_DIST.
670 
671   If SuperLU_DIST is installed (see the manual for
672   instructions on how to declare the existence of external packages),
673   a matrix type can be constructed which invokes SuperLU_DIST solvers.
674   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
675   This matrix type is only supported for double precision real.
676 
677   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
678   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
679   MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
680   for communicators controlling multiple processes.  It is recommended that you call both of
681   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
682   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
683   without data copy.
684 
685   Options Database Keys:
686 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
687 . -mat_superlu_dist_r <n> - number of rows in processor partition
688 . -mat_superlu_dist_c <n> - number of columns in processor partition
689 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
690 . -mat_superlu_dist_equil - equilibrate the matrix
691 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
692 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
693 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
694 . -mat_superlu_dist_iterrefine - use iterative refinement
695 - -mat_superlu_dist_statprint - print factorization information
696 
697    Level: beginner
698 
699 .seealso: PCLU
700 M*/
701 
702 EXTERN_C_BEGIN
703 #undef __FUNCT__
704 #define __FUNCT__ "MatCreate_SuperLU_DIST"
705 int MatCreate_SuperLU_DIST(Mat A) {
706   int ierr,size;
707 
708   PetscFunctionBegin;
709   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
710   /*   and SuperLU_DIST types */
711   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
712   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
713   if (size == 1) {
714     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
715   } else {
716     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
717   }
718   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr);
719   PetscFunctionReturn(0);
720 }
721 EXTERN_C_END
722 
723