xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 28b08bd3b5f59ffcd1196bf53847454d207c6e8e)
1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2 /*
3         Provides an interface to the SuperLU_DIST_2.0 sparse solver
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
9 #include "stdlib.h"
10 #endif
11 
12 EXTERN_C_BEGIN
13 #if defined(PETSC_USE_COMPLEX)
14 #include "superlu_zdefs.h"
15 #else
16 #include "superlu_ddefs.h"
17 #endif
18 EXTERN_C_END
19 
20 typedef enum { GLOBAL,DISTRIBUTED
21 } SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   MatStructure            flg;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   MatType basetype;
42   /* A few function pointers for inheritance */
43   int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   int (*MatView)(Mat,PetscViewer);
45   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   int (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN int MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 
55 EXTERN_C_BEGIN
56 #undef __FUNCT__
57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
58 int MatConvert_SuperLU_DIST_Base(Mat A,MatType type,Mat *newmat) {
59   /* This routine is only called to convert an unfactored PETSc-SuperLU_DIST matrix */
60   /* to its base PETSc type, so we will ignore 'MatType type'. */
61   int              ierr;
62   Mat              B=*newmat;
63   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
64 
65   PetscFunctionBegin;
66   if (B != A) {
67     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
68   }
69   /* Reset the original function pointers */
70   B->ops->duplicate        = lu->MatDuplicate;
71   B->ops->view             = lu->MatView;
72   B->ops->assemblyend      = lu->MatAssemblyEnd;
73   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
74   B->ops->destroy          = lu->MatDestroy;
75 
76   ierr = PetscObjectChangeTypeName((PetscObject)B,lu->basetype);CHKERRQ(ierr);
77   ierr = PetscFree(lu);CHKERRQ(ierr);
78 
79   *newmat = B;
80   PetscFunctionReturn(0);
81 }
82 EXTERN_C_END
83 
84 #undef __FUNCT__
85 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
86 int MatDestroy_SuperLU_DIST(Mat A)
87 {
88   int              ierr;
89   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
90 
91   PetscFunctionBegin;
92   if (lu->CleanUpSuperLU_Dist) {
93     /* Deallocate SuperLU_DIST storage */
94     if (lu->MatInputMode == GLOBAL) {
95       Destroy_CompCol_Matrix_dist(&lu->A_sup);
96     } else {
97       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
98       if ( lu->options.SolveInitialized ) {
99 #if defined(PETSC_USE_COMPLEX)
100         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
101 #else
102         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
103 #endif
104       }
105     }
106     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
107     ScalePermstructFree(&lu->ScalePermstruct);
108     LUstructFree(&lu->LUstruct);
109 
110     /* Release the SuperLU_DIST process grid. */
111     superlu_gridexit(&lu->grid);
112 
113     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
114   }
115 
116   ierr = MatConvert_SuperLU_DIST_Base(A,lu->basetype,&A);CHKERRQ(ierr);
117   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
118 
119   PetscFunctionReturn(0);
120 }
121 
122 #undef __FUNCT__
123 #define __FUNCT__ "MatSolve_SuperLU_DIST"
124 int MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
125 {
126   Mat_MPIAIJ       *aa = (Mat_MPIAIJ*)A->data;
127   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
128   int              ierr, size=aa->size;
129   int              m=A->M, N=A->N;
130   SuperLUStat_t    stat;
131   double           berr[1];
132   PetscScalar      *bptr;
133   int              info, nrhs=1;
134   Vec              x_seq;
135   IS               iden;
136   VecScatter       scat;
137   PetscLogDouble   time0,time,time_min,time_max;
138 
139   PetscFunctionBegin;
140   if (size > 1) {
141     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
142       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
143       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
144       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
145       ierr = ISDestroy(iden);CHKERRQ(ierr);
146 
147       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
148       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
149       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
150     } else { /* distributed mat input */
151       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
152       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
153     }
154   } else { /* size == 1 */
155     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
156     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
157   }
158 
159   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
160 
161   PStatInit(&stat);        /* Initialize the statistics variables. */
162   if (lu->StatPrint) {
163     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); /* to be removed */
164     ierr = PetscGetTime(&time0);CHKERRQ(ierr);  /* to be removed */
165   }
166   if (lu->MatInputMode == GLOBAL) {
167 #if defined(PETSC_USE_COMPLEX)
168     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
169                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
170 #else
171     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
172                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
173 #endif
174   } else { /* distributed mat input */
175 #if defined(PETSC_USE_COMPLEX)
176     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
177 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
178     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
179 #else
180     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
181 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
182     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
183 #endif
184   }
185   if (lu->StatPrint) {
186     ierr = PetscGetTime(&time);CHKERRQ(ierr);  /* to be removed */
187      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
188   }
189   PStatFree(&stat);
190 
191   if (size > 1) {
192     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
193       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
194       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
195       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
196       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
197       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
198     } else {
199       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
200     }
201   } else {
202     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
203   }
204   if (lu->StatPrint) {
205     time0 = time - time0;
206     ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);CHKERRQ(ierr);
207     ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);CHKERRQ(ierr);
208     ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);CHKERRQ(ierr);
209     time = time/size; /* average time */
210     ierr = PetscPrintf(A->comm, "  Time for superlu_dist solve (max/min/avg): %g / %g / %g\n\n",time_max,time_min,time);CHKERRQ(ierr);
211   }
212   PetscFunctionReturn(0);
213 }
214 
215 #undef __FUNCT__
216 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
217 int MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F)
218 {
219   Mat_MPIAIJ       *fac = (Mat_MPIAIJ*)(*F)->data,*mat;
220   Mat              *tseq,A_seq = PETSC_NULL;
221   Mat_SeqAIJ       *aa,*bb;
222   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
223   int              M=A->M,N=A->N,info,ierr,size=fac->size,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
224                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
225   SuperLUStat_t    stat;
226   double           *berr=0;
227   IS               isrow;
228   PetscLogDouble   time0[2],time[2],time_min[2],time_max[2];
229 #if defined(PETSC_USE_COMPLEX)
230   doublecomplex    *av, *bv;
231 #else
232   double           *av, *bv;
233 #endif
234 
235   PetscFunctionBegin;
236   if (lu->StatPrint) {
237     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
238     ierr = PetscGetTime(&time0[0]);CHKERRQ(ierr);
239   }
240 
241   if (lu->MatInputMode == GLOBAL) { /* global mat input */
242     if (size > 1) { /* convert mpi A to seq mat A */
243       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr);
244       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr);
245       ierr = ISDestroy(isrow);CHKERRQ(ierr);
246 
247       A_seq = *tseq;
248       ierr = PetscFree(tseq);CHKERRQ(ierr);
249       aa =  (Mat_SeqAIJ*)A_seq->data;
250     } else {
251       aa =  (Mat_SeqAIJ*)A->data;
252     }
253 
254     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
255     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
256 #if defined(PETSC_USE_COMPLEX)
257       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
258 #else
259       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
260 #endif
261     } else { /* successive numeric factorization, sparsity pattern is reused. */
262       Destroy_CompCol_Matrix_dist(&lu->A_sup);
263       Destroy_LU(N, &lu->grid, &lu->LUstruct);
264       lu->options.Fact = SamePattern;
265     }
266 #if defined(PETSC_USE_COMPLEX)
267     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
268 #else
269     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
270 #endif
271 
272     /* Create compressed column matrix A_sup. */
273 #if defined(PETSC_USE_COMPLEX)
274     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
275 #else
276     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
277 #endif
278   } else { /* distributed mat input */
279     mat =  (Mat_MPIAIJ*)A->data;
280     aa=(Mat_SeqAIJ*)(mat->A)->data;
281     bb=(Mat_SeqAIJ*)(mat->B)->data;
282     ai=aa->i; aj=aa->j;
283     bi=bb->i; bj=bb->j;
284 #if defined(PETSC_USE_COMPLEX)
285     av=(doublecomplex*)aa->a;
286     bv=(doublecomplex*)bb->a;
287 #else
288     av=aa->a;
289     bv=bb->a;
290 #endif
291     rstart = mat->rstart;
292     nz     = aa->nz + bb->nz;
293     garray = mat->garray;
294     rstart = mat->rstart;
295 
296     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
297 #if defined(PETSC_USE_COMPLEX)
298       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
299 #else
300       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
301 #endif
302     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
303       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
304       Destroy_LU(N, &lu->grid, &lu->LUstruct);
305       lu->options.Fact = SamePattern;
306     }
307     nz = 0; jB = 0; irow = mat->rstart;
308     for ( i=0; i<m; i++ ) {
309       lu->row[i] = nz;
310       countA = ai[i+1] - ai[i];
311       countB = bi[i+1] - bi[i];
312       ajj = aj + ai[i];  /* ptr to the beginning of this row */
313       bjj = bj + bi[i];
314 
315       /* B part, smaller col index */
316       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
317       for (j=0; j<countB; j++){
318         jcol = garray[bjj[j]];
319         if (jcol > colA_start) {
320           jB = j;
321           break;
322         }
323         lu->col[nz] = jcol;
324         lu->val[nz++] = *bv++;
325         if (j==countB-1) jB = countB;
326       }
327 
328       /* A part */
329       for (j=0; j<countA; j++){
330         lu->col[nz] = mat->rstart + ajj[j];
331         lu->val[nz++] = *av++;
332       }
333 
334       /* B part, larger col index */
335       for (j=jB; j<countB; j++){
336         lu->col[nz] = garray[bjj[j]];
337         lu->val[nz++] = *bv++;
338       }
339     }
340     lu->row[m] = nz;
341 #if defined(PETSC_USE_COMPLEX)
342     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
343 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
344 #else
345     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
346 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
347 #endif
348   }
349   if (lu->StatPrint) {
350     ierr = PetscGetTime(&time[0]);CHKERRQ(ierr);
351     time0[0] = time[0] - time0[0];
352   }
353 
354   /* Factor the matrix. */
355   PStatInit(&stat);   /* Initialize the statistics variables. */
356 
357   if (lu->StatPrint) {
358     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
359     ierr = PetscGetTime(&time0[1]);CHKERRQ(ierr);
360   }
361 
362   if (lu->MatInputMode == GLOBAL) { /* global mat input */
363 #if defined(PETSC_USE_COMPLEX)
364     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
365                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
366 #else
367     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
368                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
369 #endif
370   } else { /* distributed mat input */
371 #if defined(PETSC_USE_COMPLEX)
372     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
373 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
374     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
375 #else
376     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
377 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
378     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
379 #endif
380   }
381   if (lu->StatPrint) {
382     ierr = PetscGetTime(&time[1]);CHKERRQ(ierr);  /* to be removed */
383     time0[1] = time[1] - time0[1];
384     if (lu->StatPrint) PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
385   }
386   PStatFree(&stat);
387 
388   if (lu->MatInputMode == GLOBAL && size > 1){
389     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
390   }
391 
392   if (lu->StatPrint) {
393     ierr = MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm);
394     ierr = MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm);
395     ierr = MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm);
396     for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */
397     ierr = PetscPrintf(A->comm, "  Time for mat conversion (max/min/avg):    %g / %g / %g\n",time_max[0],time_min[0],time[0]);
398     ierr = PetscPrintf(A->comm, "  Time for superlu_dist fact (max/min/avg): %g / %g / %g\n\n",time_max[1],time_min[1],time[1]);
399   }
400   (*F)->assembled = PETSC_TRUE;
401   lu->flg         = SAME_NONZERO_PATTERN;
402   PetscFunctionReturn(0);
403 }
404 
405 /* Note the Petsc r and c permutations are ignored */
406 #undef __FUNCT__
407 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
408 int MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
409 {
410   Mat               B;
411   Mat_SuperLU_DIST  *lu;
412   int               ierr,M=A->M,N=A->N,size;
413   superlu_options_t options;
414   char              buff[32];
415   PetscTruth        flg;
416   char              *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
417   char              *prtype[] = {"LargeDiag","NATURAL"};
418 
419   PetscFunctionBegin;
420   /* Create the factorization matrix */
421   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
422   ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr);
423   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
424   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
425 
426   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
427   B->ops->solve            = MatSolve_SuperLU_DIST;
428   B->factor                = FACTOR_LU;
429 
430   lu = (Mat_SuperLU_DIST*)(B->spptr);
431 
432   /* Set the input options */
433   set_default_options(&options);
434   lu->MatInputMode = GLOBAL;
435   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
436 
437   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
438   lu->nprow = size/2;               /* Default process rows.      */
439   if (lu->nprow == 0) lu->nprow = 1;
440   lu->npcol = size/lu->nprow;           /* Default process columns.   */
441 
442   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
443 
444     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
445     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
446     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
447 
448     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
449     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
450 
451     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
452     if (!flg) {
453       options.Equil = NO;
454     }
455 
456     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);CHKERRQ(ierr);
457     while (flg) {
458       ierr = PetscStrcmp(buff,"LargeDiag",&flg);CHKERRQ(ierr);
459       if (flg) {
460         options.RowPerm = LargeDiag;
461         break;
462       }
463       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
464       if (flg) {
465         options.RowPerm = NOROWPERM;
466         break;
467       }
468       SETERRQ1(1,"Unknown row permutation %s",buff);
469     }
470 
471     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);CHKERRQ(ierr);
472     while (flg) {
473       ierr = PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);CHKERRQ(ierr);
474       if (flg) {
475         options.ColPerm = MMD_AT_PLUS_A;
476         break;
477       }
478       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
479       if (flg) {
480         options.ColPerm = NATURAL;
481         break;
482       }
483       ierr = PetscStrcmp(buff,"MMD_ATA",&flg);CHKERRQ(ierr);
484       if (flg) {
485         options.ColPerm = MMD_ATA;
486         break;
487       }
488       ierr = PetscStrcmp(buff,"COLAMD",&flg);CHKERRQ(ierr);
489       if (flg) {
490         options.ColPerm = COLAMD;
491         break;
492       }
493       SETERRQ1(1,"Unknown column permutation %s",buff);
494     }
495 
496     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
497     if (!flg) {
498       options.ReplaceTinyPivot = NO;
499     }
500 
501     options.IterRefine = NOREFINE;
502     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
503     if (flg) {
504       options.IterRefine = DOUBLE;
505     }
506 
507     if (PetscLogPrintInfo) {
508       lu->StatPrint = (int)PETSC_TRUE;
509     } else {
510       lu->StatPrint = (int)PETSC_FALSE;
511     }
512     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
513                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
514   PetscOptionsEnd();
515 
516   /* Initialize the SuperLU process grid. */
517   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
518 
519   /* Initialize ScalePermstruct and LUstruct. */
520   ScalePermstructInit(M, N, &lu->ScalePermstruct);
521   LUstructInit(M, N, &lu->LUstruct);
522 
523   lu->options            = options;
524   lu->flg                = DIFFERENT_NONZERO_PATTERN;
525   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
526   *F = B;
527   PetscFunctionReturn(0);
528 }
529 
530 #undef __FUNCT__
531 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
532 int MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
533   int              ierr;
534   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
535 
536   PetscFunctionBegin;
537   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
538   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
539   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
540   PetscFunctionReturn(0);
541 }
542 
543 #undef __FUNCT__
544 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
545 int MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
546 {
547   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
548   superlu_options_t options;
549   int               ierr;
550   char              *colperm;
551 
552   PetscFunctionBegin;
553   /* check if matrix is superlu_dist type */
554   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
555 
556   options = lu->options;
557   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
558   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
559   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
560   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
561   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
562   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
563   if (options.ColPerm == NATURAL) {
564     colperm = "NATURAL";
565   } else if (options.ColPerm == MMD_AT_PLUS_A) {
566     colperm = "MMD_AT_PLUS_A";
567   } else if (options.ColPerm == MMD_ATA) {
568     colperm = "MMD_ATA";
569   } else if (options.ColPerm == COLAMD) {
570     colperm = "COLAMD";
571   } else {
572     SETERRQ(1,"Unknown column permutation");
573   }
574   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatView_SuperLU_DIST"
580 int MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
581 {
582   int               ierr;
583   PetscTruth        isascii;
584   PetscViewerFormat format;
585   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
586 
587   PetscFunctionBegin;
588   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
589 
590   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
591   if (isascii) {
592     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
593     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
594       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
595     }
596   }
597   PetscFunctionReturn(0);
598 }
599 
600 
601 EXTERN_C_BEGIN
602 #undef __FUNCT__
603 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
604 int MatConvert_Base_SuperLU_DIST(Mat A,MatType type,Mat *newmat) {
605   /* This routine is only called to convert to MATSUPERLU_DIST */
606   /* from MATSEQAIJ if A has a single process communicator */
607   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
608   int              ierr,size;
609   MPI_Comm         comm;
610   Mat              B=*newmat;
611   Mat_SuperLU_DIST *lu;
612 
613   PetscFunctionBegin;
614   if (B != A) {
615     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
616   }
617 
618   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
619   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
620 
621   lu->MatDuplicate         = A->ops->duplicate;
622   lu->MatView              = A->ops->view;
623   lu->MatAssemblyEnd       = A->ops->assemblyend;
624   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
625   lu->MatDestroy           = A->ops->destroy;
626   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
627 
628   B->spptr                 = (void*)lu;
629   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
630   B->ops->view             = MatView_SuperLU_DIST;
631   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
632   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
633   B->ops->destroy          = MatDestroy_SuperLU_DIST;
634   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
635   if (size == 1) {
636     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
637                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
638     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
639                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
640   } else {
641     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
642                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
643     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
644                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
645   }
646   PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.");
647   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
648   *newmat = B;
649   PetscFunctionReturn(0);
650 }
651 EXTERN_C_END
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
655 int MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
656   int ierr;
657   PetscFunctionBegin;
658   ierr = (*A->ops->duplicate)(A,op,M);CHKERRQ(ierr);
659   ierr = MatConvert_Base_SuperLU_DIST(*M,MATSUPERLU_DIST,M);CHKERRQ(ierr);
660   PetscFunctionReturn(0);
661 }
662 
663 /*MC
664   MATSUPERLU_DIST - a matrix type providing direct solvers (LU) for parallel matrices
665   via the external package SuperLU_DIST.
666 
667   If SuperLU_DIST is installed (see the manual for
668   instructions on how to declare the existence of external packages),
669   a matrix type can be constructed which invokes SuperLU_DIST solvers.
670   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
671   This matrix type is only supported for double precision real.
672 
673   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
674   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
675   MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
676   for communicators controlling multiple processes.  It is recommended that you call both of
677   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
678   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
679   without data copy.
680 
681   Options Database Keys:
682 + -mat_type superlu_dist
683 . -mat_superlu_dist_r <n> - number of rows in processor partition
684 . -mat_superlu_dist_c <n> - number of columns in processor partition
685 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
686 . -mat_superlu_dist_equil - equilibrate the matrix
687 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
688 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
689 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
690 . -mat_superlu_dist_iterrefine - use iterative refinement
691 - -mat_superlu_dist_statprint - print factorization information
692 
693    Level: beginner
694 
695 .seealso: PCLU
696 M*/
697 
698 EXTERN_C_BEGIN
699 #undef __FUNCT__
700 #define __FUNCT__ "MatCreate_SuperLU_DIST"
701 int MatCreate_SuperLU_DIST(Mat A) {
702   int ierr,size;
703 
704   PetscFunctionBegin;
705   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
706   /*   and SuperLU_DIST types */
707   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
708   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
709   if (size == 1) {
710     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
711   } else {
712     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
713   }
714   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr);
715   PetscFunctionReturn(0);
716 }
717 EXTERN_C_END
718 
719 EXTERN_C_BEGIN
720 #undef __FUNCT__
721 #define __FUNCT__ "MatLoad_SuperLU_DIST"
722 int MatLoad_SuperLU_DIST(PetscViewer viewer,MatType type,Mat *A) {
723   int      ierr,size,(*r)(PetscViewer,MatType,Mat*);
724   MPI_Comm comm;
725 
726   PetscFunctionBegin;
727   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
728   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
729   if (size == 1) {
730     ierr = PetscFListFind(comm,MatLoadList,MATSEQAIJ,(void(**)(void))&r);CHKERRQ(ierr);
731   } else {
732     ierr = PetscFListFind(comm,MatLoadList,MATMPIAIJ,(void(**)(void))&r);CHKERRQ(ierr);
733   }
734   ierr = (*r)(viewer,type,A);CHKERRQ(ierr);
735   PetscFunctionReturn(0);
736 }
737 EXTERN_C_END
738