xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision a2ec6df8280ae283e8b0f45d20ebe04f5d3dfc7b)
1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2 /*
3         Provides an interface to the SuperLU_DIST_2.0 sparse solver
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
9 #include "stdlib.h"
10 #endif
11 
12 EXTERN_C_BEGIN
13 #if defined(PETSC_USE_COMPLEX)
14 #include "superlu_zdefs.h"
15 #else
16 #include "superlu_ddefs.h"
17 #endif
18 EXTERN_C_END
19 
20 typedef enum { GLOBAL,DISTRIBUTED
21 } SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   MatStructure            flg;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   int size;
42   /* A few function pointers for inheritance */
43   int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   int (*MatView)(Mat,PetscViewer);
45   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   int (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN int MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 
55 EXTERN_C_BEGIN
56 #undef __FUNCT__
57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
58 int MatConvert_SuperLU_DIST_Base(Mat A,MatType type,Mat *newmat) {
59   int              ierr;
60   Mat              B=*newmat;
61   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
62 
63   PetscFunctionBegin;
64   if (B != A) {
65     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
66   }
67   /* Reset the original function pointers */
68   B->ops->duplicate        = lu->MatDuplicate;
69   B->ops->view             = lu->MatView;
70   B->ops->assemblyend      = lu->MatAssemblyEnd;
71   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
72   B->ops->destroy          = lu->MatDestroy;
73 
74   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
75   ierr = PetscFree(lu);CHKERRQ(ierr);
76 
77   *newmat = B;
78   PetscFunctionReturn(0);
79 }
80 EXTERN_C_END
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
84 int MatDestroy_SuperLU_DIST(Mat A)
85 {
86   int              ierr;
87   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
88 
89   PetscFunctionBegin;
90   if (lu->CleanUpSuperLU_Dist) {
91     /* Deallocate SuperLU_DIST storage */
92     if (lu->MatInputMode == GLOBAL) {
93       Destroy_CompCol_Matrix_dist(&lu->A_sup);
94     } else {
95       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
96       if ( lu->options.SolveInitialized ) {
97 #if defined(PETSC_USE_COMPLEX)
98         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
99 #else
100         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
101 #endif
102       }
103     }
104     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
105     ScalePermstructFree(&lu->ScalePermstruct);
106     LUstructFree(&lu->LUstruct);
107 
108     /* Release the SuperLU_DIST process grid. */
109     superlu_gridexit(&lu->grid);
110 
111     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
112   }
113 
114   if (lu->size == 1) {
115     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
116   } else {
117     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
118   }
119   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
120 
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatSolve_SuperLU_DIST"
126 int MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
127 {
128   Mat_MPIAIJ       *aa = (Mat_MPIAIJ*)A->data;
129   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
130   int              ierr, size=aa->size;
131   int              m=A->M, N=A->N;
132   SuperLUStat_t    stat;
133   double           berr[1];
134   PetscScalar      *bptr;
135   int              info, nrhs=1;
136   Vec              x_seq;
137   IS               iden;
138   VecScatter       scat;
139   PetscLogDouble   time0,time,time_min,time_max;
140 
141   PetscFunctionBegin;
142   if (size > 1) {
143     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
144       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
145       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
146       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
147       ierr = ISDestroy(iden);CHKERRQ(ierr);
148 
149       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
150       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
151       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
152     } else { /* distributed mat input */
153       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
154       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
155     }
156   } else { /* size == 1 */
157     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
158     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
159   }
160 
161   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
162 
163   PStatInit(&stat);        /* Initialize the statistics variables. */
164   if (lu->StatPrint) {
165     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); /* to be removed */
166     ierr = PetscGetTime(&time0);CHKERRQ(ierr);  /* to be removed */
167   }
168   if (lu->MatInputMode == GLOBAL) {
169 #if defined(PETSC_USE_COMPLEX)
170     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
171                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
172 #else
173     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
174                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
175 #endif
176   } else { /* distributed mat input */
177 #if defined(PETSC_USE_COMPLEX)
178     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
179 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
180     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
181 #else
182     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
183 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
184     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
185 #endif
186   }
187   if (lu->StatPrint) {
188     ierr = PetscGetTime(&time);CHKERRQ(ierr);  /* to be removed */
189      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
190   }
191   PStatFree(&stat);
192 
193   if (size > 1) {
194     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
195       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
196       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
197       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
198       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
199       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
200     } else {
201       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
202     }
203   } else {
204     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
205   }
206   if (lu->StatPrint) {
207     time0 = time - time0;
208     ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);CHKERRQ(ierr);
209     ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);CHKERRQ(ierr);
210     ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);CHKERRQ(ierr);
211     time = time/size; /* average time */
212     ierr = PetscPrintf(A->comm, "  Time for superlu_dist solve (max/min/avg): %g / %g / %g\n\n",time_max,time_min,time);CHKERRQ(ierr);
213   }
214   PetscFunctionReturn(0);
215 }
216 
217 #undef __FUNCT__
218 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
219 int MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F)
220 {
221   Mat_MPIAIJ       *fac = (Mat_MPIAIJ*)(*F)->data,*mat;
222   Mat              *tseq,A_seq = PETSC_NULL;
223   Mat_SeqAIJ       *aa,*bb;
224   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
225   int              M=A->M,N=A->N,info,ierr,size=fac->size,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
226                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
227   SuperLUStat_t    stat;
228   double           *berr=0;
229   IS               isrow;
230   PetscLogDouble   time0[2],time[2],time_min[2],time_max[2];
231 #if defined(PETSC_USE_COMPLEX)
232   doublecomplex    *av, *bv;
233 #else
234   double           *av, *bv;
235 #endif
236 
237   PetscFunctionBegin;
238   if (lu->StatPrint) {
239     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
240     ierr = PetscGetTime(&time0[0]);CHKERRQ(ierr);
241   }
242 
243   if (lu->MatInputMode == GLOBAL) { /* global mat input */
244     if (size > 1) { /* convert mpi A to seq mat A */
245       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr);
246       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr);
247       ierr = ISDestroy(isrow);CHKERRQ(ierr);
248 
249       A_seq = *tseq;
250       ierr = PetscFree(tseq);CHKERRQ(ierr);
251       aa =  (Mat_SeqAIJ*)A_seq->data;
252     } else {
253       aa =  (Mat_SeqAIJ*)A->data;
254     }
255 
256     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
257     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
258 #if defined(PETSC_USE_COMPLEX)
259       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
260 #else
261       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
262 #endif
263     } else { /* successive numeric factorization, sparsity pattern is reused. */
264       Destroy_CompCol_Matrix_dist(&lu->A_sup);
265       Destroy_LU(N, &lu->grid, &lu->LUstruct);
266       lu->options.Fact = SamePattern;
267     }
268 #if defined(PETSC_USE_COMPLEX)
269     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
270 #else
271     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
272 #endif
273 
274     /* Create compressed column matrix A_sup. */
275 #if defined(PETSC_USE_COMPLEX)
276     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
277 #else
278     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
279 #endif
280   } else { /* distributed mat input */
281     mat =  (Mat_MPIAIJ*)A->data;
282     aa=(Mat_SeqAIJ*)(mat->A)->data;
283     bb=(Mat_SeqAIJ*)(mat->B)->data;
284     ai=aa->i; aj=aa->j;
285     bi=bb->i; bj=bb->j;
286 #if defined(PETSC_USE_COMPLEX)
287     av=(doublecomplex*)aa->a;
288     bv=(doublecomplex*)bb->a;
289 #else
290     av=aa->a;
291     bv=bb->a;
292 #endif
293     rstart = mat->rstart;
294     nz     = aa->nz + bb->nz;
295     garray = mat->garray;
296     rstart = mat->rstart;
297 
298     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
299 #if defined(PETSC_USE_COMPLEX)
300       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
301 #else
302       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
303 #endif
304     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
305       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
306       Destroy_LU(N, &lu->grid, &lu->LUstruct);
307       lu->options.Fact = SamePattern;
308     }
309     nz = 0; jB = 0; irow = mat->rstart;
310     for ( i=0; i<m; i++ ) {
311       lu->row[i] = nz;
312       countA = ai[i+1] - ai[i];
313       countB = bi[i+1] - bi[i];
314       ajj = aj + ai[i];  /* ptr to the beginning of this row */
315       bjj = bj + bi[i];
316 
317       /* B part, smaller col index */
318       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
319       for (j=0; j<countB; j++){
320         jcol = garray[bjj[j]];
321         if (jcol > colA_start) {
322           jB = j;
323           break;
324         }
325         lu->col[nz] = jcol;
326         lu->val[nz++] = *bv++;
327         if (j==countB-1) jB = countB;
328       }
329 
330       /* A part */
331       for (j=0; j<countA; j++){
332         lu->col[nz] = mat->rstart + ajj[j];
333         lu->val[nz++] = *av++;
334       }
335 
336       /* B part, larger col index */
337       for (j=jB; j<countB; j++){
338         lu->col[nz] = garray[bjj[j]];
339         lu->val[nz++] = *bv++;
340       }
341     }
342     lu->row[m] = nz;
343 #if defined(PETSC_USE_COMPLEX)
344     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
345 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
346 #else
347     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
348 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
349 #endif
350   }
351   if (lu->StatPrint) {
352     ierr = PetscGetTime(&time[0]);CHKERRQ(ierr);
353     time0[0] = time[0] - time0[0];
354   }
355 
356   /* Factor the matrix. */
357   PStatInit(&stat);   /* Initialize the statistics variables. */
358 
359   if (lu->StatPrint) {
360     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
361     ierr = PetscGetTime(&time0[1]);CHKERRQ(ierr);
362   }
363 
364   if (lu->MatInputMode == GLOBAL) { /* global mat input */
365 #if defined(PETSC_USE_COMPLEX)
366     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
367                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
368 #else
369     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
370                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
371 #endif
372   } else { /* distributed mat input */
373 #if defined(PETSC_USE_COMPLEX)
374     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
375 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
376     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
377 #else
378     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
379 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
380     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
381 #endif
382   }
383   if (lu->StatPrint) {
384     ierr = PetscGetTime(&time[1]);CHKERRQ(ierr);  /* to be removed */
385     time0[1] = time[1] - time0[1];
386     if (lu->StatPrint) PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
387   }
388   PStatFree(&stat);
389 
390   if (lu->MatInputMode == GLOBAL && size > 1){
391     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
392   }
393 
394   if (lu->StatPrint) {
395     ierr = MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm);
396     ierr = MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm);
397     ierr = MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm);
398     for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */
399     ierr = PetscPrintf(A->comm, "  Time for mat conversion (max/min/avg):    %g / %g / %g\n",time_max[0],time_min[0],time[0]);
400     ierr = PetscPrintf(A->comm, "  Time for superlu_dist fact (max/min/avg): %g / %g / %g\n\n",time_max[1],time_min[1],time[1]);
401   }
402   (*F)->assembled = PETSC_TRUE;
403   lu->flg         = SAME_NONZERO_PATTERN;
404   PetscFunctionReturn(0);
405 }
406 
407 /* Note the Petsc r and c permutations are ignored */
408 #undef __FUNCT__
409 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
410 int MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
411 {
412   Mat               B;
413   Mat_SuperLU_DIST  *lu;
414   int               ierr,M=A->M,N=A->N,size;
415   superlu_options_t options;
416   char              buff[32];
417   PetscTruth        flg;
418   char              *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
419   char              *prtype[] = {"LargeDiag","NATURAL"};
420 
421   PetscFunctionBegin;
422   /* Create the factorization matrix */
423   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
424   ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr);
425   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
426   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
427 
428   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
429   B->ops->solve            = MatSolve_SuperLU_DIST;
430   B->factor                = FACTOR_LU;
431 
432   lu = (Mat_SuperLU_DIST*)(B->spptr);
433 
434   /* Set the input options */
435   set_default_options(&options);
436   lu->MatInputMode = GLOBAL;
437   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
438 
439   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
440   lu->nprow = size/2;               /* Default process rows.      */
441   if (lu->nprow == 0) lu->nprow = 1;
442   lu->npcol = size/lu->nprow;           /* Default process columns.   */
443 
444   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
445 
446     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
447     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
448     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
449 
450     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
451     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
452 
453     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
454     if (!flg) {
455       options.Equil = NO;
456     }
457 
458     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);CHKERRQ(ierr);
459     while (flg) {
460       ierr = PetscStrcmp(buff,"LargeDiag",&flg);CHKERRQ(ierr);
461       if (flg) {
462         options.RowPerm = LargeDiag;
463         break;
464       }
465       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
466       if (flg) {
467         options.RowPerm = NOROWPERM;
468         break;
469       }
470       SETERRQ1(1,"Unknown row permutation %s",buff);
471     }
472 
473     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);CHKERRQ(ierr);
474     while (flg) {
475       ierr = PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);CHKERRQ(ierr);
476       if (flg) {
477         options.ColPerm = MMD_AT_PLUS_A;
478         break;
479       }
480       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
481       if (flg) {
482         options.ColPerm = NATURAL;
483         break;
484       }
485       ierr = PetscStrcmp(buff,"MMD_ATA",&flg);CHKERRQ(ierr);
486       if (flg) {
487         options.ColPerm = MMD_ATA;
488         break;
489       }
490       ierr = PetscStrcmp(buff,"COLAMD",&flg);CHKERRQ(ierr);
491       if (flg) {
492         options.ColPerm = COLAMD;
493         break;
494       }
495       SETERRQ1(1,"Unknown column permutation %s",buff);
496     }
497 
498     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
499     if (!flg) {
500       options.ReplaceTinyPivot = NO;
501     }
502 
503     options.IterRefine = NOREFINE;
504     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
505     if (flg) {
506       options.IterRefine = DOUBLE;
507     }
508 
509     if (PetscLogPrintInfo) {
510       lu->StatPrint = (int)PETSC_TRUE;
511     } else {
512       lu->StatPrint = (int)PETSC_FALSE;
513     }
514     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
515                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
516   PetscOptionsEnd();
517 
518   /* Initialize the SuperLU process grid. */
519   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
520 
521   /* Initialize ScalePermstruct and LUstruct. */
522   ScalePermstructInit(M, N, &lu->ScalePermstruct);
523   LUstructInit(M, N, &lu->LUstruct);
524 
525   lu->options            = options;
526   lu->flg                = DIFFERENT_NONZERO_PATTERN;
527   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
528   *F = B;
529   PetscFunctionReturn(0);
530 }
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
534 int MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
535   int              ierr;
536   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
537 
538   PetscFunctionBegin;
539   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
540   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
541   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
542   PetscFunctionReturn(0);
543 }
544 
545 #undef __FUNCT__
546 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
547 int MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
548 {
549   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
550   superlu_options_t options;
551   int               ierr;
552   char              *colperm;
553 
554   PetscFunctionBegin;
555   /* check if matrix is superlu_dist type */
556   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
557 
558   options = lu->options;
559   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
560   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
561   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
562   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
563   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
564   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
565   if (options.ColPerm == NATURAL) {
566     colperm = "NATURAL";
567   } else if (options.ColPerm == MMD_AT_PLUS_A) {
568     colperm = "MMD_AT_PLUS_A";
569   } else if (options.ColPerm == MMD_ATA) {
570     colperm = "MMD_ATA";
571   } else if (options.ColPerm == COLAMD) {
572     colperm = "COLAMD";
573   } else {
574     SETERRQ(1,"Unknown column permutation");
575   }
576   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatView_SuperLU_DIST"
582 int MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
583 {
584   int               ierr;
585   PetscTruth        isascii;
586   PetscViewerFormat format;
587   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
588 
589   PetscFunctionBegin;
590   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
591 
592   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
593   if (isascii) {
594     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
595     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
596       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
597     }
598   }
599   PetscFunctionReturn(0);
600 }
601 
602 
603 EXTERN_C_BEGIN
604 #undef __FUNCT__
605 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
606 int MatConvert_Base_SuperLU_DIST(Mat A,MatType type,Mat *newmat) {
607   /* This routine is only called to convert to MATSUPERLU_DIST */
608   /* from MATSEQAIJ if A has a single process communicator */
609   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
610   int              ierr;
611   MPI_Comm         comm;
612   Mat              B=*newmat;
613   Mat_SuperLU_DIST *lu;
614 
615   PetscFunctionBegin;
616   if (B != A) {
617     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
618   }
619 
620   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
621   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
622 
623   lu->MatDuplicate         = A->ops->duplicate;
624   lu->MatView              = A->ops->view;
625   lu->MatAssemblyEnd       = A->ops->assemblyend;
626   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
627   lu->MatDestroy           = A->ops->destroy;
628   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
629 
630   B->spptr                 = (void*)lu;
631   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
632   B->ops->view             = MatView_SuperLU_DIST;
633   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
634   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
635   B->ops->destroy          = MatDestroy_SuperLU_DIST;
636   ierr = MPI_Comm_size(comm,&(lu->size));CHKERRQ(ierr);CHKERRQ(ierr);
637   if ((lu->size) == 1) {
638     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
639                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
640     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
641                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
642   } else {
643     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
644                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
645     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
646                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
647   }
648   PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.");
649   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
650   *newmat = B;
651   PetscFunctionReturn(0);
652 }
653 EXTERN_C_END
654 
655 #undef __FUNCT__
656 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
657 int MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
658   int ierr;
659   PetscFunctionBegin;
660   ierr = (*A->ops->duplicate)(A,op,M);CHKERRQ(ierr);
661   ierr = MatConvert_Base_SuperLU_DIST(*M,MATSUPERLU_DIST,M);CHKERRQ(ierr);
662   PetscFunctionReturn(0);
663 }
664 
665 /*MC
666   MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
667   via the external package SuperLU_DIST.
668 
669   If SuperLU_DIST is installed (see the manual for
670   instructions on how to declare the existence of external packages),
671   a matrix type can be constructed which invokes SuperLU_DIST solvers.
672   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
673   This matrix type is only supported for double precision real.
674 
675   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
676   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
677   MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
678   for communicators controlling multiple processes.  It is recommended that you call both of
679   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
680   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
681   without data copy.
682 
683   Options Database Keys:
684 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
685 . -mat_superlu_dist_r <n> - number of rows in processor partition
686 . -mat_superlu_dist_c <n> - number of columns in processor partition
687 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
688 . -mat_superlu_dist_equil - equilibrate the matrix
689 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
690 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
691 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
692 . -mat_superlu_dist_iterrefine - use iterative refinement
693 - -mat_superlu_dist_statprint - print factorization information
694 
695    Level: beginner
696 
697 .seealso: PCLU
698 M*/
699 
700 EXTERN_C_BEGIN
701 #undef __FUNCT__
702 #define __FUNCT__ "MatCreate_SuperLU_DIST"
703 int MatCreate_SuperLU_DIST(Mat A) {
704   int ierr,size;
705 
706   PetscFunctionBegin;
707   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
708   /*   and SuperLU_DIST types */
709   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
710   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
711   if (size == 1) {
712     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
713   } else {
714     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
715   }
716   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr);
717   PetscFunctionReturn(0);
718 }
719 EXTERN_C_END
720 
721