xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 00ff2a266771cbcf97b488d2ea0f5039d5ce3159)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum { GLOBAL,DISTRIBUTED
22 } SuperLU_MatInputMode;
23 
24 typedef struct {
25   int_t                   nprow,npcol,*row,*col;
26   gridinfo_t              grid;
27   superlu_options_t       options;
28   SuperMatrix             A_sup;
29   ScalePermstruct_t       ScalePermstruct;
30   LUstruct_t              LUstruct;
31   int                     StatPrint;
32   int                     MatInputMode;
33   SOLVEstruct_t           SOLVEstruct;
34   MatStructure            flg;
35   MPI_Comm                comm_superlu;
36 #if defined(PETSC_USE_COMPLEX)
37   doublecomplex           *val;
38 #else
39   double                  *val;
40 #endif
41 
42   /* A few function pointers for inheritance */
43   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   PetscErrorCode (*MatView)(Mat,PetscViewer);
45   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 
55 EXTERN_C_BEGIN
56 #undef __FUNCT__
57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
59 {
60   PetscErrorCode   ierr;
61   Mat              B=*newmat;
62   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
63 
64   PetscFunctionBegin;
65   if (reuse == MAT_INITIAL_MATRIX) {
66     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
67   }
68   /* Reset the original function pointers */
69   B->ops->duplicate        = lu->MatDuplicate;
70   B->ops->view             = lu->MatView;
71   B->ops->assemblyend      = lu->MatAssemblyEnd;
72   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
73   B->ops->destroy          = lu->MatDestroy;
74 
75   ierr = PetscFree(lu);CHKERRQ(ierr);
76 
77   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
78   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
79   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
80   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
81 
82   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
83   *newmat = B;
84   PetscFunctionReturn(0);
85 }
86 EXTERN_C_END
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
91 {
92   PetscErrorCode   ierr;
93   PetscMPIInt      size;
94   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
95 
96   PetscFunctionBegin;
97   if (lu->CleanUpSuperLU_Dist) {
98     /* Deallocate SuperLU_DIST storage */
99     if (lu->MatInputMode == GLOBAL) {
100       Destroy_CompCol_Matrix_dist(&lu->A_sup);
101     } else {
102       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
103       if ( lu->options.SolveInitialized ) {
104 #if defined(PETSC_USE_COMPLEX)
105         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
106 #else
107         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
108 #endif
109       }
110     }
111     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
112     ScalePermstructFree(&lu->ScalePermstruct);
113     LUstructFree(&lu->LUstruct);
114 
115     /* Release the SuperLU_DIST process grid. */
116     superlu_gridexit(&lu->grid);
117 
118     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
119   }
120 
121   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
122   if (size == 1) {
123     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
124   } else {
125     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
126   }
127   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
128   PetscFunctionReturn(0);
129 }
130 
131 #undef __FUNCT__
132 #define __FUNCT__ "MatSolve_SuperLU_DIST"
133 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
134 {
135   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
136   PetscErrorCode   ierr;
137   PetscMPIInt      size;
138   PetscInt         m=A->rmap.N, N=A->cmap.N;
139   SuperLUStat_t    stat;
140   double           berr[1];
141   PetscScalar      *bptr;
142   PetscInt         info, nrhs=1;
143   Vec              x_seq;
144   IS               iden;
145   VecScatter       scat;
146 
147   PetscFunctionBegin;
148   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
149   if (size > 1) {
150     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
151       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
152       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
153       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
154       ierr = ISDestroy(iden);CHKERRQ(ierr);
155 
156       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
157       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
158       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
159     } else { /* distributed mat input */
160       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
161       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
162     }
163   } else { /* size == 1 */
164     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
165     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
166   }
167 
168   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
169 
170   PStatInit(&stat);        /* Initialize the statistics variables. */
171   if (lu->MatInputMode == GLOBAL) {
172 #if defined(PETSC_USE_COMPLEX)
173     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
174                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
175 #else
176     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
177                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
178 #endif
179   } else { /* distributed mat input */
180 #if defined(PETSC_USE_COMPLEX)
181     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
182 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
183     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
184 #else
185     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
186 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
187     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
188 #endif
189   }
190   if (lu->options.PrintStat) {
191      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
192   }
193   PStatFree(&stat);
194 
195   if (size > 1) {
196     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
197       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
198       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
199       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
200       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
201       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
202     } else {
203       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
204     }
205   } else {
206     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
207   }
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
213 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
214 {
215   Mat              *tseq,A_seq = PETSC_NULL;
216   Mat_SeqAIJ       *aa,*bb;
217   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
218   PetscErrorCode   ierr;
219   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
220                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
221   PetscMPIInt      size,rank;
222   SuperLUStat_t    stat;
223   double           *berr=0;
224   IS               isrow;
225   PetscLogDouble   time0,time,time_min,time_max;
226   Mat              F_diag=PETSC_NULL;
227 #if defined(PETSC_USE_COMPLEX)
228   doublecomplex    *av, *bv;
229 #else
230   double           *av, *bv;
231 #endif
232 
233   PetscFunctionBegin;
234   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
235   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
236 
237   if (lu->options.PrintStat) { /* collect time for mat conversion */
238     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
239     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
240   }
241 
242   if (lu->MatInputMode == GLOBAL) { /* global mat input */
243     if (size > 1) { /* convert mpi A to seq mat A */
244       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
245       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
246       ierr = ISDestroy(isrow);CHKERRQ(ierr);
247 
248       A_seq = *tseq;
249       ierr = PetscFree(tseq);CHKERRQ(ierr);
250       aa =  (Mat_SeqAIJ*)A_seq->data;
251     } else {
252       aa =  (Mat_SeqAIJ*)A->data;
253     }
254 
255     /* Convert Petsc NR matrix to SuperLU_DIST NC.
256        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
257     if (lu->flg == SAME_NONZERO_PATTERN) {/* successive numeric factorization, sparsity pattern is reused. */
258       Destroy_CompCol_Matrix_dist(&lu->A_sup);
259       Destroy_LU(N, &lu->grid, &lu->LUstruct);
260       lu->options.Fact = SamePattern;
261     }
262 #if defined(PETSC_USE_COMPLEX)
263     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
264 #else
265     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
266 #endif
267 
268     /* Create compressed column matrix A_sup. */
269 #if defined(PETSC_USE_COMPLEX)
270     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
271 #else
272     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
273 #endif
274   } else { /* distributed mat input */
275     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
276     aa=(Mat_SeqAIJ*)(mat->A)->data;
277     bb=(Mat_SeqAIJ*)(mat->B)->data;
278     ai=aa->i; aj=aa->j;
279     bi=bb->i; bj=bb->j;
280 #if defined(PETSC_USE_COMPLEX)
281     av=(doublecomplex*)aa->a;
282     bv=(doublecomplex*)bb->a;
283 #else
284     av=aa->a;
285     bv=bb->a;
286 #endif
287     rstart = A->rmap.rstart;
288     nz     = aa->nz + bb->nz;
289     garray = mat->garray;
290 
291     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
292 #if defined(PETSC_USE_COMPLEX)
293       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
294 #else
295       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
296 #endif
297     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
298       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
299       Destroy_LU(N, &lu->grid, &lu->LUstruct);
300       lu->options.Fact = SamePattern;
301     }
302     nz = 0; irow = rstart;
303     for ( i=0; i<m; i++ ) {
304       lu->row[i] = nz;
305       countA = ai[i+1] - ai[i];
306       countB = bi[i+1] - bi[i];
307       ajj = aj + ai[i];  /* ptr to the beginning of this row */
308       bjj = bj + bi[i];
309 
310       /* B part, smaller col index */
311       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
312       jB = 0;
313       for (j=0; j<countB; j++){
314         jcol = garray[bjj[j]];
315         if (jcol > colA_start) {
316           jB = j;
317           break;
318         }
319         lu->col[nz] = jcol;
320         lu->val[nz++] = *bv++;
321         if (j==countB-1) jB = countB;
322       }
323 
324       /* A part */
325       for (j=0; j<countA; j++){
326         lu->col[nz] = rstart + ajj[j];
327         lu->val[nz++] = *av++;
328       }
329 
330       /* B part, larger col index */
331       for (j=jB; j<countB; j++){
332         lu->col[nz] = garray[bjj[j]];
333         lu->val[nz++] = *bv++;
334       }
335     }
336     lu->row[m] = nz;
337 #if defined(PETSC_USE_COMPLEX)
338     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
339 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
340 #else
341     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
342 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
343 #endif
344   }
345   if (lu->options.PrintStat) {
346     ierr = PetscGetTime(&time);CHKERRQ(ierr);
347     time0 = time - time0;
348   }
349 
350   /* Factor the matrix. */
351   PStatInit(&stat);   /* Initialize the statistics variables. */
352 
353   if (lu->MatInputMode == GLOBAL) { /* global mat input */
354 #if defined(PETSC_USE_COMPLEX)
355     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
356                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
357 #else
358     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
359                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
360 #endif
361   } else { /* distributed mat input */
362 #if defined(PETSC_USE_COMPLEX)
363     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
364 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
365     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
366 #else
367     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
368 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
369     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
370 #endif
371   }
372 
373   if (lu->MatInputMode == GLOBAL && size > 1){
374     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
375   }
376 
377   if (lu->options.PrintStat) {
378     if (size > 1){
379       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
380       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
381       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
382       time = time/size; /* average time */
383       if (!rank)
384         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
385                               %g / %g / %g\n",time_max,time_min,time);
386     } else {
387       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
388                               %g\n",time0);
389     }
390 
391     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
392   }
393   PStatFree(&stat);
394   if (size > 1){
395     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
396     F_diag->assembled = PETSC_TRUE;
397   }
398   (*F)->assembled   = PETSC_TRUE;
399   lu->flg           = SAME_NONZERO_PATTERN;
400   PetscFunctionReturn(0);
401 }
402 
403 /* Note the Petsc r and c permutations are ignored */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
406 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
407 {
408   Mat               B;
409   Mat_SuperLU_DIST  *lu;
410   PetscErrorCode    ierr;
411   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
412   PetscMPIInt       size;
413   superlu_options_t options;
414   PetscTruth        flg;
415   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
416   const char        *prtype[] = {"LargeDiag","NATURAL"};
417 
418   PetscFunctionBegin;
419   /* Create the factorization matrix */
420   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
421   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
422   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
423   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
424   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
425 
426   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
427   B->ops->solve            = MatSolve_SuperLU_DIST;
428   B->factor                = FACTOR_LU;
429 
430   lu = (Mat_SuperLU_DIST*)(B->spptr);
431 
432   /* Set the input options */
433   set_default_options_dist(&options);
434 
435   lu->MatInputMode = GLOBAL;
436   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
437 
438   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
439   lu->nprow = size/2;               /* Default process rows.      */
440   if (!lu->nprow) lu->nprow = 1;
441   lu->npcol = size/lu->nprow;           /* Default process columns.   */
442 
443   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
444 
445     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
446     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
447     if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol");
448 
449     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
450     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
451 
452     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
453     if (!flg) {
454       options.Equil = NO;
455     }
456 
457     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
458     if (flg) {
459       switch (indx) {
460       case 0:
461         options.RowPerm = LargeDiag;
462         break;
463       case 1:
464         options.RowPerm = NOROWPERM;
465         break;
466       }
467     }
468 
469     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
470     if (flg) {
471       switch (indx) {
472       case 0:
473         options.ColPerm = MMD_AT_PLUS_A;
474         break;
475       case 1:
476         options.ColPerm = NATURAL;
477         break;
478       case 2:
479         options.ColPerm = MMD_ATA;
480         break;
481       }
482     }
483 
484     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
485     if (!flg) {
486       options.ReplaceTinyPivot = NO;
487     }
488 
489     options.IterRefine = NOREFINE;
490     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
491     if (flg) {
492       options.IterRefine = DOUBLE;
493     }
494 
495     if (PetscLogPrintInfo) {
496       options.PrintStat = YES;
497     } else {
498       options.PrintStat = NO;
499     }
500     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
501                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
502   PetscOptionsEnd();
503 
504   /* Initialize the SuperLU process grid. */
505   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
506 
507   /* Initialize ScalePermstruct and LUstruct. */
508   ScalePermstructInit(M, N, &lu->ScalePermstruct);
509   LUstructInit(M, N, &lu->LUstruct);
510 
511   lu->options            = options;
512   lu->flg                = DIFFERENT_NONZERO_PATTERN;
513   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
514   *F = B;
515 
516   PetscFunctionReturn(0);
517 }
518 
519 #undef __FUNCT__
520 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
521 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
522   PetscErrorCode   ierr;
523   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
524 
525   PetscFunctionBegin;
526   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
527   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
528   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
529   PetscFunctionReturn(0);
530 }
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
534 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
535 {
536   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
537   superlu_options_t options;
538   PetscErrorCode    ierr;
539 
540   PetscFunctionBegin;
541   /* check if matrix is superlu_dist type */
542   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
543 
544   options = lu->options;
545   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
546   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
547   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
548   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
549   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
550   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
551   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
552   if (options.ColPerm == NATURAL) {
553     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
554   } else if (options.ColPerm == MMD_AT_PLUS_A) {
555     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
556   } else if (options.ColPerm == MMD_ATA) {
557     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
558   } else {
559     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
560   }
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatView_SuperLU_DIST"
566 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
567 {
568   PetscErrorCode    ierr;
569   PetscTruth        iascii;
570   PetscViewerFormat format;
571   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
572 
573   PetscFunctionBegin;
574   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
575 
576   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
577   if (iascii) {
578     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
579     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
580       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
581     }
582   }
583   PetscFunctionReturn(0);
584 }
585 
586 
587 EXTERN_C_BEGIN
588 #undef __FUNCT__
589 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
590 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
591 {
592   /* This routine is only called to convert to MATSUPERLU_DIST */
593   /* from MATSEQAIJ if A has a single process communicator */
594   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
595   PetscErrorCode   ierr;
596   PetscMPIInt      size;
597   MPI_Comm         comm;
598   Mat              B=*newmat;
599   Mat_SuperLU_DIST *lu;
600 
601   PetscFunctionBegin;
602   if (reuse == MAT_INITIAL_MATRIX) {
603     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
604   }
605 
606   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
607   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
608 
609   lu->MatDuplicate         = A->ops->duplicate;
610   lu->MatView              = A->ops->view;
611   lu->MatAssemblyEnd       = A->ops->assemblyend;
612   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
613   lu->MatDestroy           = A->ops->destroy;
614   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
615 
616   B->spptr                 = (void*)lu;
617   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
618   B->ops->view             = MatView_SuperLU_DIST;
619   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
620   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
621   B->ops->destroy          = MatDestroy_SuperLU_DIST;
622   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
623   if (size == 1) {
624     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
625                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
626     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
627                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
628   } else {
629     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
630                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
631     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
632                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
633   }
634   ierr = PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr);
635   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
636   *newmat = B;
637   PetscFunctionReturn(0);
638 }
639 EXTERN_C_END
640 
641 #undef __FUNCT__
642 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
643 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
644   PetscErrorCode   ierr;
645   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
646 
647   PetscFunctionBegin;
648   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
649   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
650   PetscFunctionReturn(0);
651 }
652 
653 /*MC
654   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
655   via the external package SuperLU_DIST.
656 
657   If SuperLU_DIST is installed (see the manual for
658   instructions on how to declare the existence of external packages),
659   a matrix type can be constructed which invokes SuperLU_DIST solvers.
660   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
661 
662   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
663   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
664   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
665   for communicators controlling multiple processes.  It is recommended that you call both of
666   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
667   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
668   without data copy.
669 
670   Options Database Keys:
671 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
672 . -mat_superlu_dist_r <n> - number of rows in processor partition
673 . -mat_superlu_dist_c <n> - number of columns in processor partition
674 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
675 . -mat_superlu_dist_equil - equilibrate the matrix
676 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
677 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
678 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
679 . -mat_superlu_dist_iterrefine - use iterative refinement
680 - -mat_superlu_dist_statprint - print factorization information
681 
682    Level: beginner
683 
684 .seealso: PCLU
685 M*/
686 
687 EXTERN_C_BEGIN
688 #undef __FUNCT__
689 #define __FUNCT__ "MatCreate_SuperLU_DIST"
690 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
691 {
692   PetscErrorCode ierr;
693   PetscMPIInt    size;
694 
695   PetscFunctionBegin;
696   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
697   /*   and SuperLU_DIST types */
698   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
699   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
700   if (size == 1) {
701     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
702   } else {
703     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
704     /*  A_diag = 0x0 ???  -- do we need it?
705     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
706     ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
707     */
708   }
709   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
710   PetscFunctionReturn(0);
711 }
712 EXTERN_C_END
713 
714