xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 9af31e4ad595286b4e2df8194fee047feeccfe42)
1 /*
2         Provides an interface to the SuperLU_DIST_2.0 sparse solver
3 */
4 
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
8 #include "stdlib.h"
9 #endif
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13 #include "superlu_zdefs.h"
14 #else
15 #include "superlu_ddefs.h"
16 #endif
17 EXTERN_C_END
18 
19 typedef enum { GLOBAL,DISTRIBUTED
20 } SuperLU_MatInputMode;
21 
22 typedef struct {
23   int_t                   nprow,npcol,*row,*col;
24   gridinfo_t              grid;
25   superlu_options_t       options;
26   SuperMatrix             A_sup;
27   ScalePermstruct_t       ScalePermstruct;
28   LUstruct_t              LUstruct;
29   int                     StatPrint;
30   int                     MatInputMode;
31   SOLVEstruct_t           SOLVEstruct;
32   MatStructure            flg;
33   MPI_Comm                comm_superlu;
34 #if defined(PETSC_USE_COMPLEX)
35   doublecomplex           *val;
36 #else
37   double                  *val;
38 #endif
39 
40   /* A few function pointers for inheritance */
41   int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
42   int (*MatView)(Mat,PetscViewer);
43   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
44   int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
45   int (*MatDestroy)(Mat);
46 
47   /* Flag to clean up (non-global) SuperLU objects during Destroy */
48   PetscTruth CleanUpSuperLU_Dist;
49 } Mat_SuperLU_DIST;
50 
51 EXTERN int MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
52 
53 EXTERN_C_BEGIN
54 #undef __FUNCT__
55 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
56 int MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,Mat *newmat) {
57   int              ierr;
58   Mat              B=*newmat;
59   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
60 
61   PetscFunctionBegin;
62   if (B != A) {
63     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
64   }
65   /* Reset the original function pointers */
66   B->ops->duplicate        = lu->MatDuplicate;
67   B->ops->view             = lu->MatView;
68   B->ops->assemblyend      = lu->MatAssemblyEnd;
69   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
70   B->ops->destroy          = lu->MatDestroy;
71 
72   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
73   ierr = PetscFree(lu);CHKERRQ(ierr);
74 
75   *newmat = B;
76   PetscFunctionReturn(0);
77 }
78 EXTERN_C_END
79 
80 #undef __FUNCT__
81 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
82 int MatDestroy_SuperLU_DIST(Mat A)
83 {
84   int              ierr,size;
85   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
86 
87   PetscFunctionBegin;
88   if (lu->CleanUpSuperLU_Dist) {
89     /* Deallocate SuperLU_DIST storage */
90     if (lu->MatInputMode == GLOBAL) {
91       Destroy_CompCol_Matrix_dist(&lu->A_sup);
92     } else {
93       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
94       if ( lu->options.SolveInitialized ) {
95 #if defined(PETSC_USE_COMPLEX)
96         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
97 #else
98         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
99 #endif
100       }
101     }
102     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
103     ScalePermstructFree(&lu->ScalePermstruct);
104     LUstructFree(&lu->LUstruct);
105 
106     /* Release the SuperLU_DIST process grid. */
107     superlu_gridexit(&lu->grid);
108 
109     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
110   }
111 
112   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
113   if (size == 1) {
114     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
115   } else {
116     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
117   }
118   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
119 
120   PetscFunctionReturn(0);
121 }
122 
123 #undef __FUNCT__
124 #define __FUNCT__ "MatSolve_SuperLU_DIST"
125 int MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
126 {
127   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
128   int              ierr, size;
129   int              m=A->M, N=A->N;
130   SuperLUStat_t    stat;
131   double           berr[1];
132   PetscScalar      *bptr;
133   int              info, nrhs=1;
134   Vec              x_seq;
135   IS               iden;
136   VecScatter       scat;
137 
138   PetscFunctionBegin;
139   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
140   if (size > 1) {
141     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
142       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
143       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
144       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
145       ierr = ISDestroy(iden);CHKERRQ(ierr);
146 
147       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
148       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
149       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
150     } else { /* distributed mat input */
151       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
152       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
153     }
154   } else { /* size == 1 */
155     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
156     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
157   }
158 
159   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
160 
161   PStatInit(&stat);        /* Initialize the statistics variables. */
162 
163   if (lu->MatInputMode == GLOBAL) {
164 #if defined(PETSC_USE_COMPLEX)
165     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
166                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
167 #else
168     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
169                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
170 #endif
171   } else { /* distributed mat input */
172 #if defined(PETSC_USE_COMPLEX)
173     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
174 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
175     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
176 #else
177     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
178 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
179     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
180 #endif
181   }
182   if (lu->StatPrint) {
183      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
184   }
185   PStatFree(&stat);
186 
187   if (size > 1) {
188     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
189       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
190       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
191       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
192       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
193       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
194     } else {
195       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
196     }
197   } else {
198     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
199   }
200   PetscFunctionReturn(0);
201 }
202 
203 #undef __FUNCT__
204 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
205 int MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F)
206 {
207   Mat              *tseq,A_seq = PETSC_NULL;
208   Mat_SeqAIJ       *aa,*bb;
209   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
210   int              M=A->M,N=A->N,info,ierr,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
211                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
212   SuperLUStat_t    stat;
213   double           *berr=0;
214   IS               isrow;
215   PetscLogDouble   time0,time,time_min,time_max;
216 #if defined(PETSC_USE_COMPLEX)
217   doublecomplex    *av, *bv;
218 #else
219   double           *av, *bv;
220 #endif
221 
222   PetscFunctionBegin;
223   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
224   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
225 
226   if (lu->StatPrint) { /* collect time for mat conversion */
227     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
228     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
229   }
230 
231   if (lu->MatInputMode == GLOBAL) { /* global mat input */
232     if (size > 1) { /* convert mpi A to seq mat A */
233       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
234       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
235       ierr = ISDestroy(isrow);CHKERRQ(ierr);
236 
237       A_seq = *tseq;
238       ierr = PetscFree(tseq);CHKERRQ(ierr);
239       aa =  (Mat_SeqAIJ*)A_seq->data;
240     } else {
241       aa =  (Mat_SeqAIJ*)A->data;
242     }
243 
244     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
245     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
246 #if defined(PETSC_USE_COMPLEX)
247       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
248 #else
249       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
250 #endif
251     } else { /* successive numeric factorization, sparsity pattern is reused. */
252       Destroy_CompCol_Matrix_dist(&lu->A_sup);
253       Destroy_LU(N, &lu->grid, &lu->LUstruct);
254       lu->options.Fact = SamePattern;
255     }
256 #if defined(PETSC_USE_COMPLEX)
257     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
258 #else
259     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
260 #endif
261 
262     /* Create compressed column matrix A_sup. */
263 #if defined(PETSC_USE_COMPLEX)
264     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
265 #else
266     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
267 #endif
268   } else { /* distributed mat input */
269     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
270     aa=(Mat_SeqAIJ*)(mat->A)->data;
271     bb=(Mat_SeqAIJ*)(mat->B)->data;
272     ai=aa->i; aj=aa->j;
273     bi=bb->i; bj=bb->j;
274 #if defined(PETSC_USE_COMPLEX)
275     av=(doublecomplex*)aa->a;
276     bv=(doublecomplex*)bb->a;
277 #else
278     av=aa->a;
279     bv=bb->a;
280 #endif
281     rstart = mat->rstart;
282     nz     = aa->nz + bb->nz;
283     garray = mat->garray;
284     rstart = mat->rstart;
285 
286     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
287 #if defined(PETSC_USE_COMPLEX)
288       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
289 #else
290       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
291 #endif
292     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
293       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
294       Destroy_LU(N, &lu->grid, &lu->LUstruct);
295       lu->options.Fact = SamePattern;
296     }
297     nz = 0; irow = mat->rstart;
298     for ( i=0; i<m; i++ ) {
299       lu->row[i] = nz;
300       countA = ai[i+1] - ai[i];
301       countB = bi[i+1] - bi[i];
302       ajj = aj + ai[i];  /* ptr to the beginning of this row */
303       bjj = bj + bi[i];
304 
305       /* B part, smaller col index */
306       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
307       jB = 0;
308       for (j=0; j<countB; j++){
309         jcol = garray[bjj[j]];
310         if (jcol > colA_start) {
311           jB = j;
312           break;
313         }
314         lu->col[nz] = jcol;
315         lu->val[nz++] = *bv++;
316         if (j==countB-1) jB = countB;
317       }
318 
319       /* A part */
320       for (j=0; j<countA; j++){
321         lu->col[nz] = mat->rstart + ajj[j];
322         lu->val[nz++] = *av++;
323       }
324 
325       /* B part, larger col index */
326       for (j=jB; j<countB; j++){
327         lu->col[nz] = garray[bjj[j]];
328         lu->val[nz++] = *bv++;
329       }
330     }
331     lu->row[m] = nz;
332 #if defined(PETSC_USE_COMPLEX)
333     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
334 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
335 #else
336     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
337 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
338 #endif
339   }
340   if (lu->StatPrint) {
341     ierr = PetscGetTime(&time);CHKERRQ(ierr);
342     time0 = time - time0;
343   }
344 
345   /* Factor the matrix. */
346   PStatInit(&stat);   /* Initialize the statistics variables. */
347 
348   if (lu->MatInputMode == GLOBAL) { /* global mat input */
349 #if defined(PETSC_USE_COMPLEX)
350     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
351                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
352 #else
353     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
354                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
355 #endif
356   } else { /* distributed mat input */
357 #if defined(PETSC_USE_COMPLEX)
358     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
359 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
360     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
361 #else
362     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
363 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
364     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
365 #endif
366   }
367 
368   if (lu->MatInputMode == GLOBAL && size > 1){
369     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
370   }
371 
372   if (lu->StatPrint) {
373     if (size > 1){
374       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
375       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
376       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
377       time = time/size; /* average time */
378       if (!rank)
379         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
380                               %g / %g / %g\n",time_max,time_min,time);
381     } else {
382       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
383                               %g\n",time0);
384     }
385 
386     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
387   }
388   PStatFree(&stat);
389   (*F)->assembled = PETSC_TRUE;
390   lu->flg         = SAME_NONZERO_PATTERN;
391   PetscFunctionReturn(0);
392 }
393 
394 /* Note the Petsc r and c permutations are ignored */
395 #undef __FUNCT__
396 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
397 int MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
398 {
399   Mat               B;
400   Mat_SuperLU_DIST  *lu;
401   int               ierr,M=A->M,N=A->N,size,indx;
402   superlu_options_t options;
403   PetscTruth        flg;
404   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
405   const char        *prtype[] = {"LargeDiag","NATURAL"};
406 
407   PetscFunctionBegin;
408   /* Create the factorization matrix */
409   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
410   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
411   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
412   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
413 
414   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
415   B->ops->solve            = MatSolve_SuperLU_DIST;
416   B->factor                = FACTOR_LU;
417 
418   lu = (Mat_SuperLU_DIST*)(B->spptr);
419 
420   /* Set the input options */
421   set_default_options_dist(&options);
422   lu->MatInputMode = GLOBAL;
423   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
424 
425   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
426   lu->nprow = size/2;               /* Default process rows.      */
427   if (!lu->nprow) lu->nprow = 1;
428   lu->npcol = size/lu->nprow;           /* Default process columns.   */
429 
430   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
431 
432     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
433     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
434     if (size != lu->nprow * lu->npcol) SETERRQ(1,"Number of processes should be equal to nprow*npcol");
435 
436     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
437     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
438 
439     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
440     if (!flg) {
441       options.Equil = NO;
442     }
443 
444     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
445     if (flg) {
446       switch (indx) {
447       case 0:
448         options.RowPerm = LargeDiag;
449         break;
450       case 1:
451         options.RowPerm = NOROWPERM;
452         break;
453       }
454     }
455 
456     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
457     if (flg) {
458       switch (indx) {
459       case 0:
460         options.ColPerm = MMD_AT_PLUS_A;
461         break;
462       case 1:
463         options.ColPerm = NATURAL;
464         break;
465       case 2:
466         options.ColPerm = MMD_ATA;
467         break;
468       case 3:
469         options.ColPerm = COLAMD;
470         break;
471       }
472     }
473 
474     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
475     if (!flg) {
476       options.ReplaceTinyPivot = NO;
477     }
478 
479     options.IterRefine = NOREFINE;
480     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
481     if (flg) {
482       options.IterRefine = DOUBLE;
483     }
484 
485     if (PetscLogPrintInfo) {
486       lu->StatPrint = (int)PETSC_TRUE;
487     } else {
488       lu->StatPrint = (int)PETSC_FALSE;
489     }
490     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
491                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
492   PetscOptionsEnd();
493 
494   /* Initialize the SuperLU process grid. */
495   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
496 
497   /* Initialize ScalePermstruct and LUstruct. */
498   ScalePermstructInit(M, N, &lu->ScalePermstruct);
499   LUstructInit(M, N, &lu->LUstruct);
500 
501   lu->options            = options;
502   lu->flg                = DIFFERENT_NONZERO_PATTERN;
503   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
504   *F = B;
505   PetscFunctionReturn(0);
506 }
507 
508 #undef __FUNCT__
509 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
510 int MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
511   int              ierr;
512   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
513 
514   PetscFunctionBegin;
515   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
516   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
517   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
518   PetscFunctionReturn(0);
519 }
520 
521 #undef __FUNCT__
522 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
523 int MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
524 {
525   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
526   superlu_options_t options;
527   int               ierr;
528   char              *colperm;
529 
530   PetscFunctionBegin;
531   /* check if matrix is superlu_dist type */
532   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
533 
534   options = lu->options;
535   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
536   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
537   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
538   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
539   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
540   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
541   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
542   if (options.ColPerm == NATURAL) {
543     colperm = "NATURAL";
544   } else if (options.ColPerm == MMD_AT_PLUS_A) {
545     colperm = "MMD_AT_PLUS_A";
546   } else if (options.ColPerm == MMD_ATA) {
547     colperm = "MMD_ATA";
548   } else if (options.ColPerm == COLAMD) {
549     colperm = "COLAMD";
550   } else {
551     SETERRQ(1,"Unknown column permutation");
552   }
553   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
554   PetscFunctionReturn(0);
555 }
556 
557 #undef __FUNCT__
558 #define __FUNCT__ "MatView_SuperLU_DIST"
559 int MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
560 {
561   int               ierr;
562   PetscTruth        iascii;
563   PetscViewerFormat format;
564   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
565 
566   PetscFunctionBegin;
567   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
568 
569   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
570   if (iascii) {
571     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
572     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
573       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
574     }
575   }
576   PetscFunctionReturn(0);
577 }
578 
579 
580 EXTERN_C_BEGIN
581 #undef __FUNCT__
582 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
583 int MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,Mat *newmat) {
584   /* This routine is only called to convert to MATSUPERLU_DIST */
585   /* from MATSEQAIJ if A has a single process communicator */
586   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
587   int              ierr, size;
588   MPI_Comm         comm;
589   Mat              B=*newmat;
590   Mat_SuperLU_DIST *lu;
591 
592   PetscFunctionBegin;
593   if (B != A) {
594     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
595   }
596 
597   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
598   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
599 
600   lu->MatDuplicate         = A->ops->duplicate;
601   lu->MatView              = A->ops->view;
602   lu->MatAssemblyEnd       = A->ops->assemblyend;
603   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
604   lu->MatDestroy           = A->ops->destroy;
605   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
606 
607   B->spptr                 = (void*)lu;
608   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
609   B->ops->view             = MatView_SuperLU_DIST;
610   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
611   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
612   B->ops->destroy          = MatDestroy_SuperLU_DIST;
613   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
614   if (size == 1) {
615     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
616                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
617     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
618                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
619   } else {
620     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
621                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
622     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
623                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
624   }
625   PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.");
626   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
627   *newmat = B;
628   PetscFunctionReturn(0);
629 }
630 EXTERN_C_END
631 
632 #undef __FUNCT__
633 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
634 int MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
635   int              ierr;
636   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
637 
638   PetscFunctionBegin;
639   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
640   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
641   PetscFunctionReturn(0);
642 }
643 
644 /*MC
645   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
646   via the external package SuperLU_DIST.
647 
648   If SuperLU_DIST is installed (see the manual for
649   instructions on how to declare the existence of external packages),
650   a matrix type can be constructed which invokes SuperLU_DIST solvers.
651   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
652   This matrix type is only supported for double precision real.
653 
654   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
655   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
656   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
657   for communicators controlling multiple processes.  It is recommended that you call both of
658   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
659   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
660   without data copy.
661 
662   Options Database Keys:
663 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
664 . -mat_superlu_dist_r <n> - number of rows in processor partition
665 . -mat_superlu_dist_c <n> - number of columns in processor partition
666 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
667 . -mat_superlu_dist_equil - equilibrate the matrix
668 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
669 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
670 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
671 . -mat_superlu_dist_iterrefine - use iterative refinement
672 - -mat_superlu_dist_statprint - print factorization information
673 
674    Level: beginner
675 
676 .seealso: PCLU
677 M*/
678 
679 EXTERN_C_BEGIN
680 #undef __FUNCT__
681 #define __FUNCT__ "MatCreate_SuperLU_DIST"
682 int MatCreate_SuperLU_DIST(Mat A) {
683   int ierr,size;
684   Mat A_diag;
685 
686   PetscFunctionBegin;
687   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
688   /*   and SuperLU_DIST types */
689   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
690   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
691   if (size == 1) {
692     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
693   } else {
694     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
695     A_diag = ((Mat_MPIAIJ *)A->data)->A;
696     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,&A_diag);CHKERRQ(ierr);
697   }
698   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr);
699   PetscFunctionReturn(0);
700 }
701 EXTERN_C_END
702 
703