xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 618c6428babd1cdfc19b0f3e34ac03276908e50a)
1 /*$Id: superlu_DIST.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
2 /*
3         Provides an interface to the SuperLU_DIST_2.0 sparse solver
4 */
5 
6 #include "src/mat/impls/aij/seq/aij.h"
7 #include "src/mat/impls/aij/mpi/mpiaij.h"
8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
9 #include "stdlib.h"
10 #endif
11 
12 EXTERN_C_BEGIN
13 #if defined(PETSC_USE_COMPLEX)
14 #include "superlu_zdefs.h"
15 #else
16 #include "superlu_ddefs.h"
17 #endif
18 EXTERN_C_END
19 
20 typedef enum { GLOBAL,DISTRIBUTED
21 } SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   MatStructure            flg;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   MatType basetype;
42   /* A few function pointers for inheritance */
43   int (*MatView)(Mat,PetscViewer);
44   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   int (*MatDestroy)(Mat);
47 
48   /* Flag to clean up (non-global) SuperLU objects during Destroy */
49   PetscTruth CleanUpSuperLU_Dist;
50 } Mat_MPIAIJ_SuperLU_DIST;
51 
52 EXTERN_C_BEGIN
53 #undef __FUNCT__
54 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
55 int MatConvert_SuperLU_DIST_Base(Mat A,MatType type,Mat *newmat) {
56   /* This routine is only called to convert an unfactored PETSc-SuperLU_DIST matrix */
57   /* to its base PETSc type, so we will ignore 'MatType type'. */
58   int                     ierr;
59   Mat                     B=*newmat;
60   Mat_MPIAIJ_SuperLU_DIST *lu=(Mat_MPIAIJ_SuperLU_DIST *)A->spptr;
61 
62   PetscFunctionBegin;
63   if (B != A) {
64     /* This routine was inherited from SeqAIJ. */
65     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
66   } else {
67     /* Reset the original function pointers */
68     B->ops->view             = lu->MatView;
69     B->ops->assemblyend      = lu->MatAssemblyEnd;
70     B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
71     B->ops->destroy          = lu->MatDestroy;
72 
73     ierr = PetscObjectChangeTypeName((PetscObject)B,lu->basetype);CHKERRQ(ierr);
74     ierr = PetscFree(lu);CHKERRQ(ierr);
75   }
76   *newmat = B;
77   PetscFunctionReturn(0);
78 }
79 EXTERN_C_END
80 
81 #undef __FUNCT__
82 #define __FUNCT__ "MatDestroy_MPIAIJ_SuperLU_DIST"
83 int MatDestroy_MPIAIJ_SuperLU_DIST(Mat A)
84 {
85   int                     ierr;
86   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
87 
88   PetscFunctionBegin;
89   if (lu->CleanUpSuperLU_Dist) {
90     /* Deallocate SuperLU_DIST storage */
91     if (lu->MatInputMode == GLOBAL) {
92       Destroy_CompCol_Matrix_dist(&lu->A_sup);
93     } else {
94       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
95       if ( lu->options.SolveInitialized ) {
96 #if defined(PETSC_USE_COMPLEX)
97         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
98 #else
99         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
100 #endif
101       }
102     }
103     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
104     ScalePermstructFree(&lu->ScalePermstruct);
105     LUstructFree(&lu->LUstruct);
106 
107     /* Release the SuperLU_DIST process grid. */
108     superlu_gridexit(&lu->grid);
109 
110     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
111   }
112 
113   ierr = MatConvert_SuperLU_DIST_Base(A,lu->basetype,&A);CHKERRQ(ierr);
114   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
115 
116   PetscFunctionReturn(0);
117 }
118 
119 #undef __FUNCT__
120 #define __FUNCT__ "MatSolve_MPIAIJ_SuperLU_DIST"
121 int MatSolve_MPIAIJ_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
122 {
123   Mat_MPIAIJ              *aa = (Mat_MPIAIJ*)A->data;
124   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
125   int                     ierr, size=aa->size;
126   int                     m=A->M, N=A->N;
127   SuperLUStat_t           stat;
128   double                  berr[1];
129   PetscScalar             *bptr;
130   int                     info, nrhs=1;
131   Vec                     x_seq;
132   IS                      iden;
133   VecScatter              scat;
134   PetscLogDouble          time0,time,time_min,time_max;
135 
136   PetscFunctionBegin;
137   if (size > 1) {
138     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
139       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
140       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
141       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
142       ierr = ISDestroy(iden);CHKERRQ(ierr);
143 
144       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
145       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
146       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
147     } else { /* distributed mat input */
148       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
149       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
150     }
151   } else { /* size == 1 */
152     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
153     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
154   }
155 
156   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
157 
158   PStatInit(&stat);        /* Initialize the statistics variables. */
159   if (lu->StatPrint) {
160     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr); /* to be removed */
161     ierr = PetscGetTime(&time0);CHKERRQ(ierr);  /* to be removed */
162   }
163   if (lu->MatInputMode == GLOBAL) {
164 #if defined(PETSC_USE_COMPLEX)
165     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
166                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
167 #else
168     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
169                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
170 #endif
171   } else { /* distributed mat input */
172 #if defined(PETSC_USE_COMPLEX)
173     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
174 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
175     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
176 #else
177     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
178 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
179     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
180 #endif
181   }
182   if (lu->StatPrint) {
183     ierr = PetscGetTime(&time);CHKERRQ(ierr);  /* to be removed */
184      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
185   }
186   PStatFree(&stat);
187 
188   if (size > 1) {
189     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
190       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
191       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
192       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
193       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
194       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
195     } else {
196       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
197     }
198   } else {
199     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
200   }
201   if (lu->StatPrint) {
202     time0 = time - time0;
203     ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);CHKERRQ(ierr);
204     ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);CHKERRQ(ierr);
205     ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);CHKERRQ(ierr);
206     time = time/size; /* average time */
207     ierr = PetscPrintf(A->comm, "  Time for superlu_dist solve (max/min/avg): %g / %g / %g\n\n",time_max,time_min,time);CHKERRQ(ierr);
208   }
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatLUFactorNumeric_MPIAIJ_SuperLU_DIST"
214 int MatLUFactorNumeric_MPIAIJ_SuperLU_DIST(Mat A,Mat *F)
215 {
216   Mat_MPIAIJ              *fac = (Mat_MPIAIJ*)(*F)->data,*mat;
217   Mat                     *tseq,A_seq = PETSC_NULL;
218   Mat_SeqAIJ              *aa,*bb;
219   Mat_MPIAIJ_SuperLU_DIST *lu = (Mat_MPIAIJ_SuperLU_DIST*)(*F)->spptr;
220   int                     M=A->M,N=A->N,info,ierr,size=fac->size,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
221                           m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
222   SuperLUStat_t           stat;
223   double                  *berr=0;
224   IS                      isrow;
225   PetscLogDouble          time0[2],time[2],time_min[2],time_max[2];
226 #if defined(PETSC_USE_COMPLEX)
227   doublecomplex           *av, *bv;
228 #else
229   double                  *av, *bv;
230 #endif
231 
232   PetscFunctionBegin;
233   if (lu->StatPrint) {
234     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
235     ierr = PetscGetTime(&time0[0]);CHKERRQ(ierr);
236   }
237 
238   if (lu->MatInputMode == GLOBAL) { /* global mat input */
239     if (size > 1) { /* convert mpi A to seq mat A */
240       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow); CHKERRQ(ierr);
241       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq); CHKERRQ(ierr);
242       ierr = ISDestroy(isrow);CHKERRQ(ierr);
243 
244       A_seq = *tseq;
245       ierr = PetscFree(tseq);CHKERRQ(ierr);
246       aa =  (Mat_SeqAIJ*)A_seq->data;
247     } else {
248       aa =  (Mat_SeqAIJ*)A->data;
249     }
250 
251     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
252     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
253 #if defined(PETSC_USE_COMPLEX)
254       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
255 #else
256       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
257 #endif
258     } else { /* successive numeric factorization, sparsity pattern is reused. */
259       Destroy_CompCol_Matrix_dist(&lu->A_sup);
260       Destroy_LU(N, &lu->grid, &lu->LUstruct);
261       lu->options.Fact = SamePattern;
262     }
263 #if defined(PETSC_USE_COMPLEX)
264     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
265 #else
266     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
267 #endif
268 
269     /* Create compressed column matrix A_sup. */
270 #if defined(PETSC_USE_COMPLEX)
271     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
272 #else
273     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
274 #endif
275   } else { /* distributed mat input */
276     mat =  (Mat_MPIAIJ*)A->data;
277     aa=(Mat_SeqAIJ*)(mat->A)->data;
278     bb=(Mat_SeqAIJ*)(mat->B)->data;
279     ai=aa->i; aj=aa->j;
280     bi=bb->i; bj=bb->j;
281 #if defined(PETSC_USE_COMPLEX)
282     av=(doublecomplex*)aa->a;
283     bv=(doublecomplex*)bb->a;
284 #else
285     av=aa->a;
286     bv=bb->a;
287 #endif
288     rstart = mat->rstart;
289     nz     = aa->nz + bb->nz;
290     garray = mat->garray;
291     rstart = mat->rstart;
292 
293     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
294 #if defined(PETSC_USE_COMPLEX)
295       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
296 #else
297       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
298 #endif
299     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
300       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
301       Destroy_LU(N, &lu->grid, &lu->LUstruct);
302       lu->options.Fact = SamePattern;
303     }
304     nz = 0; jB = 0; irow = mat->rstart;
305     for ( i=0; i<m; i++ ) {
306       lu->row[i] = nz;
307       countA = ai[i+1] - ai[i];
308       countB = bi[i+1] - bi[i];
309       ajj = aj + ai[i];  /* ptr to the beginning of this row */
310       bjj = bj + bi[i];
311 
312       /* B part, smaller col index */
313       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
314       for (j=0; j<countB; j++){
315         jcol = garray[bjj[j]];
316         if (jcol > colA_start) {
317           jB = j;
318           break;
319         }
320         lu->col[nz] = jcol;
321         lu->val[nz++] = *bv++;
322         if (j==countB-1) jB = countB;
323       }
324 
325       /* A part */
326       for (j=0; j<countA; j++){
327         lu->col[nz] = mat->rstart + ajj[j];
328         lu->val[nz++] = *av++;
329       }
330 
331       /* B part, larger col index */
332       for (j=jB; j<countB; j++){
333         lu->col[nz] = garray[bjj[j]];
334         lu->val[nz++] = *bv++;
335       }
336     }
337     lu->row[m] = nz;
338 #if defined(PETSC_USE_COMPLEX)
339     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
340 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
341 #else
342     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
343 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
344 #endif
345   }
346   if (lu->StatPrint) {
347     ierr = PetscGetTime(&time[0]);CHKERRQ(ierr);
348     time0[0] = time[0] - time0[0];
349   }
350 
351   /* Factor the matrix. */
352   PStatInit(&stat);   /* Initialize the statistics variables. */
353 
354   if (lu->StatPrint) {
355     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
356     ierr = PetscGetTime(&time0[1]);CHKERRQ(ierr);
357   }
358 
359   if (lu->MatInputMode == GLOBAL) { /* global mat input */
360 #if defined(PETSC_USE_COMPLEX)
361     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
362                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
363 #else
364     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
365                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
366 #endif
367   } else { /* distributed mat input */
368 #if defined(PETSC_USE_COMPLEX)
369     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
370 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
371     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
372 #else
373     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
374 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
375     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
376 #endif
377   }
378   if (lu->StatPrint) {
379     ierr = PetscGetTime(&time[1]);CHKERRQ(ierr);  /* to be removed */
380     time0[1] = time[1] - time0[1];
381     if (lu->StatPrint) PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
382   }
383   PStatFree(&stat);
384 
385   if (lu->MatInputMode == GLOBAL && size > 1){
386     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
387   }
388 
389   if (lu->StatPrint) {
390     ierr = MPI_Reduce(time0,time_max,2,MPI_DOUBLE,MPI_MAX,0,A->comm);
391     ierr = MPI_Reduce(time0,time_min,2,MPI_DOUBLE,MPI_MIN,0,A->comm);
392     ierr = MPI_Reduce(time0,time,2,MPI_DOUBLE,MPI_SUM,0,A->comm);
393     for (i=0; i<2; i++) time[i] = time[i]/size; /* average time */
394     ierr = PetscPrintf(A->comm, "  Time for mat conversion (max/min/avg):    %g / %g / %g\n",time_max[0],time_min[0],time[0]);
395     ierr = PetscPrintf(A->comm, "  Time for superlu_dist fact (max/min/avg): %g / %g / %g\n\n",time_max[1],time_min[1],time[1]);
396   }
397   (*F)->assembled = PETSC_TRUE;
398   lu->flg         = SAME_NONZERO_PATTERN;
399   PetscFunctionReturn(0);
400 }
401 
402 /* Note the Petsc r and c permutations are ignored */
403 #undef __FUNCT__
404 #define __FUNCT__ "MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST"
405 int MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
406 {
407   Mat                     B;
408   Mat_MPIAIJ_SuperLU_DIST *lu;
409   int                     ierr,M=A->M,N=A->N,size;
410   superlu_options_t       options;
411   char                    buff[32];
412   PetscTruth              flg;
413   char                    *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
414   char                    *prtype[] = {"LargeDiag","NATURAL"};
415   PetscFunctionBegin;
416 
417   /* Create the factorization matrix */
418   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
419   ierr = MatSetType(B,MATSUPERLU_DIST);CHKERRQ(ierr);
420   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
421   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
422 
423   B->ops->lufactornumeric  = MatLUFactorNumeric_MPIAIJ_SuperLU_DIST;
424   B->ops->solve            = MatSolve_MPIAIJ_SuperLU_DIST;
425   B->factor                = FACTOR_LU;
426 
427   lu = (Mat_MPIAIJ_SuperLU_DIST*)(B->spptr);
428 
429   /* Set the input options */
430   set_default_options(&options);
431   lu->MatInputMode = GLOBAL;
432   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
433 
434   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
435   lu->nprow = size/2;               /* Default process rows.      */
436   if (lu->nprow == 0) lu->nprow = 1;
437   lu->npcol = size/lu->nprow;           /* Default process columns.   */
438 
439   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
440 
441     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
442     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
443     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
444 
445     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
446     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
447 
448     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
449     if (!flg) {
450       options.Equil = NO;
451     }
452 
453     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],buff,32,&flg);CHKERRQ(ierr);
454     while (flg) {
455       ierr = PetscStrcmp(buff,"LargeDiag",&flg);CHKERRQ(ierr);
456       if (flg) {
457         options.RowPerm = LargeDiag;
458         break;
459       }
460       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
461       if (flg) {
462         options.RowPerm = NOROWPERM;
463         break;
464       }
465       SETERRQ1(1,"Unknown row permutation %s",buff);
466     }
467 
468     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],buff,32,&flg);CHKERRQ(ierr);
469     while (flg) {
470       ierr = PetscStrcmp(buff,"MMD_AT_PLUS_A",&flg);CHKERRQ(ierr);
471       if (flg) {
472         options.ColPerm = MMD_AT_PLUS_A;
473         break;
474       }
475       ierr = PetscStrcmp(buff,"NATURAL",&flg);CHKERRQ(ierr);
476       if (flg) {
477         options.ColPerm = NATURAL;
478         break;
479       }
480       ierr = PetscStrcmp(buff,"MMD_ATA",&flg);CHKERRQ(ierr);
481       if (flg) {
482         options.ColPerm = MMD_ATA;
483         break;
484       }
485       ierr = PetscStrcmp(buff,"COLAMD",&flg);CHKERRQ(ierr);
486       if (flg) {
487         options.ColPerm = COLAMD;
488         break;
489       }
490       SETERRQ1(1,"Unknown column permutation %s",buff);
491     }
492 
493     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
494     if (!flg) {
495       options.ReplaceTinyPivot = NO;
496     }
497 
498     options.IterRefine = NOREFINE;
499     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
500     if (flg) {
501       options.IterRefine = DOUBLE;
502     }
503 
504     if (PetscLogPrintInfo) {
505       lu->StatPrint = (int)PETSC_TRUE;
506     } else {
507       lu->StatPrint = (int)PETSC_FALSE;
508     }
509     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
510                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
511   PetscOptionsEnd();
512 
513   /* Initialize the SuperLU process grid. */
514   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
515 
516   /* Initialize ScalePermstruct and LUstruct. */
517   ScalePermstructInit(M, N, &lu->ScalePermstruct);
518   LUstructInit(M, N, &lu->LUstruct);
519 
520   lu->options            = options;
521   lu->flg                = DIFFERENT_NONZERO_PATTERN;
522   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
523   *F = B;
524   PetscFunctionReturn(0);
525 }
526 
527 #undef __FUNCT__
528 #define __FUNCT__ "MatAssemblyEnd_MPIAIJ_SuperLU_DIST"
529 int MatAssemblyEnd_MPIAIJ_SuperLU_DIST(Mat A,MatAssemblyType mode) {
530   int                     ierr;
531   Mat_MPIAIJ_SuperLU_DIST *lu=(Mat_MPIAIJ_SuperLU_DIST*)(A->spptr);
532 
533   PetscFunctionBegin;
534   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
535   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
536   A->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST;
537   PetscFunctionReturn(0);
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatMPIAIJFactorInfo_SuperLu"
542 int MatMPIAIJFactorInfo_SuperLu(Mat A,PetscViewer viewer)
543 {
544   Mat_MPIAIJ_SuperLU_DIST *lu= (Mat_MPIAIJ_SuperLU_DIST*)A->spptr;
545   superlu_options_t       options;
546   int                     ierr;
547   char                    *colperm;
548 
549   PetscFunctionBegin;
550   /* check if matrix is superlu_dist type */
551   if (A->ops->solve != MatSolve_MPIAIJ_SuperLU_DIST) PetscFunctionReturn(0);
552 
553   options = lu->options;
554   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
555   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
556   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
557   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
558   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
559   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
560   if (options.ColPerm == NATURAL) {
561     colperm = "NATURAL";
562   } else if (options.ColPerm == MMD_AT_PLUS_A) {
563     colperm = "MMD_AT_PLUS_A";
564   } else if (options.ColPerm == MMD_ATA) {
565     colperm = "MMD_ATA";
566   } else if (options.ColPerm == COLAMD) {
567     colperm = "COLAMD";
568   } else {
569     SETERRQ(1,"Unknown column permutation");
570   }
571   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatView_MPIAIJ_Spooles_DIST"
577 int MatView_MPIAIJ_SuperLU_DIST(Mat A,PetscViewer viewer)
578 {
579   int                     ierr;
580   PetscTruth              isascii;
581   PetscViewerFormat       format;
582   Mat_MPIAIJ_SuperLU_DIST *lu=(Mat_MPIAIJ_SuperLU_DIST*)(A->spptr);
583 
584   PetscFunctionBegin;
585   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
586 
587   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
588   if (isascii) {
589     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
590     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
591       ierr = MatMPIAIJFactorInfo_SuperLu(A,viewer);CHKERRQ(ierr);
592     }
593   }
594   PetscFunctionReturn(0);
595 }
596 
597 
598 EXTERN_C_BEGIN
599 #undef __FUNCT__
600 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
601 int MatConvert_Base_SuperLU_DIST(Mat A,MatType type,Mat *newmat) {
602   /* This routine is only called to convert to MATSUPERLU_DIST */
603   /* from MATSEQAIJ if A has a single process communicator */
604   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
605   int                     ierr,size;
606   MPI_Comm                comm;
607   Mat                     B=*newmat;
608   Mat_MPIAIJ_SuperLU_DIST *lu;
609 
610   PetscFunctionBegin;
611   if (B != A) {
612     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
613   }
614 
615   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
616   ierr = PetscNew(Mat_MPIAIJ_SuperLU_DIST,&lu);CHKERRQ(ierr);
617 
618   lu->MatView              = A->ops->view;
619   lu->MatAssemblyEnd       = A->ops->assemblyend;
620   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
621   lu->MatDestroy           = A->ops->destroy;
622   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
623 
624   B->spptr                 = (void*)lu;
625   B->ops->view             = MatView_MPIAIJ_SuperLU_DIST;
626   B->ops->assemblyend      = MatAssemblyEnd_MPIAIJ_SuperLU_DIST;
627   B->ops->lufactorsymbolic = MatLUFactorSymbolic_MPIAIJ_SuperLU_DIST;
628   B->ops->destroy          = MatDestroy_MPIAIJ_SuperLU_DIST;
629   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
630   if (size == 1) {
631     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
632                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
633     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
634                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
635   } else {
636     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
637                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
638     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
639                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
640   }
641   PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.");
642   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
643   *newmat = B;
644   PetscFunctionReturn(0);
645 }
646 EXTERN_C_END
647 
648 /*MC
649   MATSUPERLU_DIST - a matrix type providing direct solvers (LU) for parallel matrices
650   via the external package SuperLU_DIST.
651 
652   If SuperLU_DIST is installed (see the manual for
653   instructions on how to declare the existence of external packages),
654   a matrix type can be constructed which invokes SuperLU_DIST solvers.
655   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
656   This matrix type is only supported for double precision real.
657 
658   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
659   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
660   MatSeqAIJSetPreallocation is supported, and similarly MatMPISBAIJSetPreallocation is supported
661   for communicators controlling multiple processes.  It is recommended that you call both of
662   the above preallocation routines for simplicity.
663 
664   Options Database Keys:
665 + -mat_type superlu_dist
666 . -mat_superlu_dist_r <n> - number of rows in processor partition
667 . -mat_superlu_dist_c <n> - number of columns in processor partition
668 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
669 . -mat_superlu_dist_equil - equilibrate the matrix
670 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
671 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
672 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
673 . -mat_superlu_dist_iterrefine - use iterative refinement
674 - -mat_superlu_dist_statprint - print factorization information
675 
676    Level: beginner
677 
678 .seealso: PCLU
679 M*/
680 
681 EXTERN_C_BEGIN
682 #undef __FUNCT__
683 #define __FUNCT__ "MatCreate_MPIAIJ_SuperLU_DIST"
684 int MatCreate_MPIAIJ_SuperLU_DIST(Mat A) {
685   int                     ierr,size;
686   MPI_Comm                comm;
687 
688   PetscFunctionBegin;
689   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
690   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
691   if (size == 1) {
692     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
693   } else {
694     ierr = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
695   }
696   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr);
697   PetscFunctionReturn(0);
698 }
699 EXTERN_C_END
700 
701 EXTERN_C_BEGIN
702 #undef __FUNCT__
703 #define __FUNCT__ "MatLoad_MPIAIJ_SuperLU_DIST"
704 int MatLoad_MPIAIJ_SuperLU_DIST(PetscViewer viewer,MatType type,Mat *A) {
705   int      ierr,size,(*r)(PetscViewer,MatType,Mat*);
706   MPI_Comm comm;
707 
708   PetscFunctionBegin;
709   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
710   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
711   if (size == 1) {
712     ierr = PetscFListFind(comm,MatLoadList,MATSEQAIJ,(void(**)(void))&r);CHKERRQ(ierr);
713   } else {
714     ierr = PetscFListFind(comm,MatLoadList,MATMPIAIJ,(void(**)(void))&r);CHKERRQ(ierr);
715   }
716   ierr = (*r)(viewer,type,A);CHKERRQ(ierr);
717   PetscFunctionReturn(0);
718 }
719 EXTERN_C_END
720