xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision f7c6e6aaaf12910997c044fca6674251bc2ff4e3)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum { GLOBAL,DISTRIBUTED
22 } SuperLU_MatInputMode;
23 
24 typedef struct {
25   int_t                   nprow,npcol,*row,*col;
26   gridinfo_t              grid;
27   superlu_options_t       options;
28   SuperMatrix             A_sup;
29   ScalePermstruct_t       ScalePermstruct;
30   LUstruct_t              LUstruct;
31   int                     StatPrint;
32   int                     MatInputMode;
33   SOLVEstruct_t           SOLVEstruct;
34   MatStructure            flg;
35   MPI_Comm                comm_superlu;
36 #if defined(PETSC_USE_COMPLEX)
37   doublecomplex           *val;
38 #else
39   double                  *val;
40 #endif
41 
42   /* A few function pointers for inheritance */
43   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   PetscErrorCode (*MatView)(Mat,PetscViewer);
45   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 
55 EXTERN_C_BEGIN
56 #undef __FUNCT__
57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
59 {
60   PetscErrorCode   ierr;
61   Mat              B=*newmat;
62   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
63 
64   PetscFunctionBegin;
65   if (reuse == MAT_INITIAL_MATRIX) {
66     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
67   }
68   /* Reset the original function pointers */
69   B->ops->duplicate        = lu->MatDuplicate;
70   B->ops->view             = lu->MatView;
71   B->ops->assemblyend      = lu->MatAssemblyEnd;
72   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
73   B->ops->destroy          = lu->MatDestroy;
74 
75   ierr = PetscFree(lu);CHKERRQ(ierr);
76 
77   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
78   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
79   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
80   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
81 
82   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
83   *newmat = B;
84   PetscFunctionReturn(0);
85 }
86 EXTERN_C_END
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
91 {
92   PetscErrorCode   ierr;
93   PetscMPIInt      size;
94   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
95 
96   PetscFunctionBegin;
97   if (lu->CleanUpSuperLU_Dist) {
98     /* Deallocate SuperLU_DIST storage */
99     if (lu->MatInputMode == GLOBAL) {
100       Destroy_CompCol_Matrix_dist(&lu->A_sup);
101     } else {
102       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
103       if ( lu->options.SolveInitialized ) {
104 #if defined(PETSC_USE_COMPLEX)
105         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
106 #else
107         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
108 #endif
109       }
110     }
111     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
112     ScalePermstructFree(&lu->ScalePermstruct);
113     LUstructFree(&lu->LUstruct);
114 
115     /* Release the SuperLU_DIST process grid. */
116     superlu_gridexit(&lu->grid);
117 
118     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
119   }
120 
121   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
122   if (size == 1) {
123     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
124   } else {
125     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
126   }
127   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
128   PetscFunctionReturn(0);
129 }
130 
131 #undef __FUNCT__
132 #define __FUNCT__ "MatSolve_SuperLU_DIST"
133 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
134 {
135   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
136   PetscErrorCode   ierr;
137   PetscMPIInt      size;
138   PetscInt         m=A->M, N=A->N;
139   SuperLUStat_t    stat;
140   double           berr[1];
141   PetscScalar      *bptr;
142   PetscInt         info, nrhs=1;
143   Vec              x_seq;
144   IS               iden;
145   VecScatter       scat;
146 
147   PetscFunctionBegin;
148   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
149   if (size > 1) {
150     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
151       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
152       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
153       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
154       ierr = ISDestroy(iden);CHKERRQ(ierr);
155 
156       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
157       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
158       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
159     } else { /* distributed mat input */
160       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
161       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
162     }
163   } else { /* size == 1 */
164     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
165     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
166   }
167 
168   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
169 
170   PStatInit(&stat);        /* Initialize the statistics variables. */
171   if (lu->MatInputMode == GLOBAL) {
172 #if defined(PETSC_USE_COMPLEX)
173     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
174                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
175 #else
176     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
177                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
178 #endif
179   } else { /* distributed mat input */
180 #if defined(PETSC_USE_COMPLEX)
181     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
182 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
183     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
184 #else
185     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
186 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
187     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
188 #endif
189   }
190   if (lu->options.PrintStat) {
191      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
192   }
193   PStatFree(&stat);
194 
195   if (size > 1) {
196     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
197       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
198       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
199       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
200       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
201       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
202     } else {
203       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
204     }
205   } else {
206     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
207   }
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
213 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
214 {
215   Mat              *tseq,A_seq = PETSC_NULL;
216   Mat_SeqAIJ       *aa,*bb;
217   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
218   PetscErrorCode   ierr;
219   PetscInt         M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
220                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
221   PetscMPIInt      size,rank;
222   SuperLUStat_t    stat;
223   double           *berr=0;
224   IS               isrow;
225   PetscLogDouble   time0,time,time_min,time_max;
226   Mat              F_diag=PETSC_NULL;
227 #if defined(PETSC_USE_COMPLEX)
228   doublecomplex    *av, *bv;
229 #else
230   double           *av, *bv;
231 #endif
232 
233   PetscFunctionBegin;
234   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
235   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
236 
237   if (lu->options.PrintStat) { /* collect time for mat conversion */
238     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
239     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
240   }
241 
242   if (lu->MatInputMode == GLOBAL) { /* global mat input */
243     if (size > 1) { /* convert mpi A to seq mat A */
244       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
245       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
246       ierr = ISDestroy(isrow);CHKERRQ(ierr);
247 
248       A_seq = *tseq;
249       ierr = PetscFree(tseq);CHKERRQ(ierr);
250       aa =  (Mat_SeqAIJ*)A_seq->data;
251     } else {
252       aa =  (Mat_SeqAIJ*)A->data;
253     }
254 
255     /* Convert Petsc NR matrix to SuperLU_DIST NC.
256        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
257     if (lu->flg == SAME_NONZERO_PATTERN) {/* successive numeric factorization, sparsity pattern is reused. */
258       Destroy_CompCol_Matrix_dist(&lu->A_sup);
259       Destroy_LU(N, &lu->grid, &lu->LUstruct);
260       lu->options.Fact = SamePattern;
261     }
262 #if defined(PETSC_USE_COMPLEX)
263     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
264 #else
265     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
266 #endif
267 
268     /* Create compressed column matrix A_sup. */
269 #if defined(PETSC_USE_COMPLEX)
270     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
271 #else
272     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
273 #endif
274   } else { /* distributed mat input */
275     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
276     aa=(Mat_SeqAIJ*)(mat->A)->data;
277     bb=(Mat_SeqAIJ*)(mat->B)->data;
278     ai=aa->i; aj=aa->j;
279     bi=bb->i; bj=bb->j;
280 #if defined(PETSC_USE_COMPLEX)
281     av=(doublecomplex*)aa->a;
282     bv=(doublecomplex*)bb->a;
283 #else
284     av=aa->a;
285     bv=bb->a;
286 #endif
287     rstart = mat->rstart;
288     nz     = aa->nz + bb->nz;
289     garray = mat->garray;
290     rstart = mat->rstart;
291 
292     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
293 #if defined(PETSC_USE_COMPLEX)
294       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
295 #else
296       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
297 #endif
298     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
299       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
300       Destroy_LU(N, &lu->grid, &lu->LUstruct);
301       lu->options.Fact = SamePattern;
302     }
303     nz = 0; irow = mat->rstart;
304     for ( i=0; i<m; i++ ) {
305       lu->row[i] = nz;
306       countA = ai[i+1] - ai[i];
307       countB = bi[i+1] - bi[i];
308       ajj = aj + ai[i];  /* ptr to the beginning of this row */
309       bjj = bj + bi[i];
310 
311       /* B part, smaller col index */
312       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
313       jB = 0;
314       for (j=0; j<countB; j++){
315         jcol = garray[bjj[j]];
316         if (jcol > colA_start) {
317           jB = j;
318           break;
319         }
320         lu->col[nz] = jcol;
321         lu->val[nz++] = *bv++;
322         if (j==countB-1) jB = countB;
323       }
324 
325       /* A part */
326       for (j=0; j<countA; j++){
327         lu->col[nz] = mat->rstart + ajj[j];
328         lu->val[nz++] = *av++;
329       }
330 
331       /* B part, larger col index */
332       for (j=jB; j<countB; j++){
333         lu->col[nz] = garray[bjj[j]];
334         lu->val[nz++] = *bv++;
335       }
336     }
337     lu->row[m] = nz;
338 #if defined(PETSC_USE_COMPLEX)
339     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
340 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
341 #else
342     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
343 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
344 #endif
345   }
346   if (lu->options.PrintStat) {
347     ierr = PetscGetTime(&time);CHKERRQ(ierr);
348     time0 = time - time0;
349   }
350 
351   /* Factor the matrix. */
352   PStatInit(&stat);   /* Initialize the statistics variables. */
353 
354   if (lu->MatInputMode == GLOBAL) { /* global mat input */
355 #if defined(PETSC_USE_COMPLEX)
356     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
357                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
358 #else
359     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
360                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
361 #endif
362   } else { /* distributed mat input */
363 #if defined(PETSC_USE_COMPLEX)
364     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
365 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
366     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
367 #else
368     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
369 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
370     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
371 #endif
372   }
373 
374   if (lu->MatInputMode == GLOBAL && size > 1){
375     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
376   }
377 
378   if (lu->options.PrintStat) {
379     if (size > 1){
380       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
381       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
382       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
383       time = time/size; /* average time */
384       if (!rank)
385         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
386                               %g / %g / %g\n",time_max,time_min,time);
387     } else {
388       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
389                               %g\n",time0);
390     }
391 
392     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
393   }
394   PStatFree(&stat);
395   if (size > 1){
396     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
397     F_diag->assembled = PETSC_TRUE;
398   }
399   (*F)->assembled   = PETSC_TRUE;
400   lu->flg           = SAME_NONZERO_PATTERN;
401   PetscFunctionReturn(0);
402 }
403 
404 /* Note the Petsc r and c permutations are ignored */
405 #undef __FUNCT__
406 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
407 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
408 {
409   Mat               B;
410   Mat_SuperLU_DIST  *lu;
411   PetscErrorCode    ierr;
412   PetscInt          M=A->M,N=A->N,indx;
413   PetscMPIInt       size;
414   superlu_options_t options;
415   PetscTruth        flg;
416   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
417   const char        *prtype[] = {"LargeDiag","NATURAL"};
418 
419   PetscFunctionBegin;
420   /* Create the factorization matrix */
421   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
422   ierr = MatSetSizes(B,A->m,A->n,M,N);CHKERRQ(ierr);
423   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
424   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
425   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
426 
427   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
428   B->ops->solve            = MatSolve_SuperLU_DIST;
429   B->factor                = FACTOR_LU;
430 
431   lu = (Mat_SuperLU_DIST*)(B->spptr);
432 
433   /* Set the input options */
434   set_default_options_dist(&options);
435 
436   lu->MatInputMode = GLOBAL;
437   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
438 
439   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
440   lu->nprow = size/2;               /* Default process rows.      */
441   if (!lu->nprow) lu->nprow = 1;
442   lu->npcol = size/lu->nprow;           /* Default process columns.   */
443 
444   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
445 
446     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
447     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
448     if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol");
449 
450     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
451     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
452 
453     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
454     if (!flg) {
455       options.Equil = NO;
456     }
457 
458     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
459     if (flg) {
460       switch (indx) {
461       case 0:
462         options.RowPerm = LargeDiag;
463         break;
464       case 1:
465         options.RowPerm = NOROWPERM;
466         break;
467       }
468     }
469 
470     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
471     if (flg) {
472       switch (indx) {
473       case 0:
474         options.ColPerm = MMD_AT_PLUS_A;
475         break;
476       case 1:
477         options.ColPerm = NATURAL;
478         break;
479       case 2:
480         options.ColPerm = MMD_ATA;
481         break;
482       }
483     }
484 
485     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
486     if (!flg) {
487       options.ReplaceTinyPivot = NO;
488     }
489 
490     options.IterRefine = NOREFINE;
491     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
492     if (flg) {
493       options.IterRefine = DOUBLE;
494     }
495 
496     if (PetscLogPrintInfo) {
497       options.PrintStat = YES;
498     } else {
499       options.PrintStat = NO;
500     }
501     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
502                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
503   PetscOptionsEnd();
504 
505   /* Initialize the SuperLU process grid. */
506   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
507 
508   /* Initialize ScalePermstruct and LUstruct. */
509   ScalePermstructInit(M, N, &lu->ScalePermstruct);
510   LUstructInit(M, N, &lu->LUstruct);
511 
512   lu->options            = options;
513   lu->flg                = DIFFERENT_NONZERO_PATTERN;
514   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
515   *F = B;
516 
517   PetscFunctionReturn(0);
518 }
519 
520 #undef __FUNCT__
521 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
522 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
523   PetscErrorCode   ierr;
524   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
525 
526   PetscFunctionBegin;
527   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
528   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
529   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
530   PetscFunctionReturn(0);
531 }
532 
533 #undef __FUNCT__
534 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
535 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
536 {
537   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
538   superlu_options_t options;
539   PetscErrorCode    ierr;
540 
541   PetscFunctionBegin;
542   /* check if matrix is superlu_dist type */
543   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
544 
545   options = lu->options;
546   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
547   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
548   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
549   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
550   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
551   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
552   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
553   if (options.ColPerm == NATURAL) {
554     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
555   } else if (options.ColPerm == MMD_AT_PLUS_A) {
556     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
557   } else if (options.ColPerm == MMD_ATA) {
558     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
559   } else {
560     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
561   }
562   PetscFunctionReturn(0);
563 }
564 
565 #undef __FUNCT__
566 #define __FUNCT__ "MatView_SuperLU_DIST"
567 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
568 {
569   PetscErrorCode    ierr;
570   PetscTruth        iascii;
571   PetscViewerFormat format;
572   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
573 
574   PetscFunctionBegin;
575   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
576 
577   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
578   if (iascii) {
579     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
580     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
581       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
582     }
583   }
584   PetscFunctionReturn(0);
585 }
586 
587 
588 EXTERN_C_BEGIN
589 #undef __FUNCT__
590 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
591 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
592 {
593   /* This routine is only called to convert to MATSUPERLU_DIST */
594   /* from MATSEQAIJ if A has a single process communicator */
595   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
596   PetscErrorCode   ierr;
597   PetscMPIInt      size;
598   MPI_Comm         comm;
599   Mat              B=*newmat;
600   Mat_SuperLU_DIST *lu;
601 
602   PetscFunctionBegin;
603   if (reuse == MAT_INITIAL_MATRIX) {
604     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
605   }
606 
607   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
608   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
609 
610   lu->MatDuplicate         = A->ops->duplicate;
611   lu->MatView              = A->ops->view;
612   lu->MatAssemblyEnd       = A->ops->assemblyend;
613   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
614   lu->MatDestroy           = A->ops->destroy;
615   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
616 
617   B->spptr                 = (void*)lu;
618   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
619   B->ops->view             = MatView_SuperLU_DIST;
620   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
621   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
622   B->ops->destroy          = MatDestroy_SuperLU_DIST;
623   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
624   if (size == 1) {
625     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
626                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
627     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
628                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
629   } else {
630     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
631                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
632     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
633                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
634   }
635   ierr = PetscVerboseInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr);
636   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
637   *newmat = B;
638   PetscFunctionReturn(0);
639 }
640 EXTERN_C_END
641 
642 #undef __FUNCT__
643 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
644 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
645   PetscErrorCode   ierr;
646   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
647 
648   PetscFunctionBegin;
649   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
650   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 
654 /*MC
655   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
656   via the external package SuperLU_DIST.
657 
658   If SuperLU_DIST is installed (see the manual for
659   instructions on how to declare the existence of external packages),
660   a matrix type can be constructed which invokes SuperLU_DIST solvers.
661   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
662 
663   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
664   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
665   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
666   for communicators controlling multiple processes.  It is recommended that you call both of
667   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
668   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
669   without data copy.
670 
671   Options Database Keys:
672 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
673 . -mat_superlu_dist_r <n> - number of rows in processor partition
674 . -mat_superlu_dist_c <n> - number of columns in processor partition
675 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
676 . -mat_superlu_dist_equil - equilibrate the matrix
677 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
678 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
679 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
680 . -mat_superlu_dist_iterrefine - use iterative refinement
681 - -mat_superlu_dist_statprint - print factorization information
682 
683    Level: beginner
684 
685 .seealso: PCLU
686 M*/
687 
688 EXTERN_C_BEGIN
689 #undef __FUNCT__
690 #define __FUNCT__ "MatCreate_SuperLU_DIST"
691 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
692 {
693   PetscErrorCode ierr;
694   PetscMPIInt    size;
695   Mat            A_diag;
696 
697   PetscFunctionBegin;
698   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
699   /*   and SuperLU_DIST types */
700   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
701   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
702   if (size == 1) {
703     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
704   } else {
705     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
706     A_diag = ((Mat_MPIAIJ *)A->data)->A;
707     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
708   }
709   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 EXTERN_C_END
713 
714