xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 2f59403fd2df48e396e6efa2e07d12d9da4cd99d)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum { GLOBAL,DISTRIBUTED
22 } SuperLU_MatInputMode;
23 
24 typedef struct {
25   int_t                   nprow,npcol,*row,*col;
26   gridinfo_t              grid;
27   superlu_options_t       options;
28   SuperMatrix             A_sup;
29   ScalePermstruct_t       ScalePermstruct;
30   LUstruct_t              LUstruct;
31   int                     StatPrint;
32   int                     MatInputMode;
33   SOLVEstruct_t           SOLVEstruct;
34   MatStructure            flg;
35   MPI_Comm                comm_superlu;
36 #if defined(PETSC_USE_COMPLEX)
37   doublecomplex           *val;
38 #else
39   double                  *val;
40 #endif
41 
42   /* A few function pointers for inheritance */
43   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   PetscErrorCode (*MatView)(Mat,PetscViewer);
45   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 
55 EXTERN_C_BEGIN
56 #undef __FUNCT__
57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
59 {
60   PetscErrorCode   ierr;
61   Mat              B=*newmat;
62   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
63 
64   PetscFunctionBegin;
65   if (reuse == MAT_INITIAL_MATRIX) {
66     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
67   }
68   /* Reset the original function pointers */
69   B->ops->duplicate        = lu->MatDuplicate;
70   B->ops->view             = lu->MatView;
71   B->ops->assemblyend      = lu->MatAssemblyEnd;
72   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
73   B->ops->destroy          = lu->MatDestroy;
74 
75   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
76   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
77   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
78   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
79 
80   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
81   *newmat = B;
82   PetscFunctionReturn(0);
83 }
84 EXTERN_C_END
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
88 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
89 {
90   PetscErrorCode   ierr;
91   PetscMPIInt      size;
92   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
93 
94   PetscFunctionBegin;
95   if (lu->CleanUpSuperLU_Dist) {
96     /* Deallocate SuperLU_DIST storage */
97     if (lu->MatInputMode == GLOBAL) {
98       Destroy_CompCol_Matrix_dist(&lu->A_sup);
99     } else {
100       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
101       if ( lu->options.SolveInitialized ) {
102 #if defined(PETSC_USE_COMPLEX)
103         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
104 #else
105         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
106 #endif
107       }
108     }
109     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
110     ScalePermstructFree(&lu->ScalePermstruct);
111     LUstructFree(&lu->LUstruct);
112 
113     /* Release the SuperLU_DIST process grid. */
114     superlu_gridexit(&lu->grid);
115 
116     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
117   }
118 
119   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
120   if (size == 1) {
121     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
122   } else {
123     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
124   }
125   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
126   PetscFunctionReturn(0);
127 }
128 
129 #undef __FUNCT__
130 #define __FUNCT__ "MatSolve_SuperLU_DIST"
131 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
132 {
133   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
134   PetscErrorCode   ierr;
135   PetscMPIInt      size;
136   PetscInt         m=A->rmap.N, N=A->cmap.N;
137   SuperLUStat_t    stat;
138   double           berr[1];
139   PetscScalar      *bptr;
140   PetscInt         info, nrhs=1;
141   Vec              x_seq;
142   IS               iden;
143   VecScatter       scat;
144 
145   PetscFunctionBegin;
146   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
147   if (size > 1) {
148     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
149       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
150       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
151       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
152       ierr = ISDestroy(iden);CHKERRQ(ierr);
153 
154       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
155       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
156       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
157     } else { /* distributed mat input */
158       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
159       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
160     }
161   } else { /* size == 1 */
162     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
163     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
164   }
165 
166   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
167 
168   PStatInit(&stat);        /* Initialize the statistics variables. */
169   if (lu->MatInputMode == GLOBAL) {
170 #if defined(PETSC_USE_COMPLEX)
171     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
172                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
173 #else
174     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
175                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
176 #endif
177   } else { /* distributed mat input */
178 #if defined(PETSC_USE_COMPLEX)
179     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
180 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
181     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
182 #else
183     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
184 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
185     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
186 #endif
187   }
188   if (lu->options.PrintStat) {
189      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
190   }
191   PStatFree(&stat);
192 
193   if (size > 1) {
194     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
195       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
196       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
197       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
198       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
199       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
200     } else {
201       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
202     }
203   } else {
204     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
205   }
206   PetscFunctionReturn(0);
207 }
208 
209 #undef __FUNCT__
210 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
211 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
212 {
213   Mat              *tseq,A_seq = PETSC_NULL;
214   Mat_SeqAIJ       *aa,*bb;
215   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
216   PetscErrorCode   ierr;
217   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
218                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
219   PetscMPIInt      size,rank;
220   SuperLUStat_t    stat;
221   double           *berr=0;
222   IS               isrow;
223   PetscLogDouble   time0,time,time_min,time_max;
224   Mat              F_diag=PETSC_NULL;
225 #if defined(PETSC_USE_COMPLEX)
226   doublecomplex    *av, *bv;
227 #else
228   double           *av, *bv;
229 #endif
230 
231   PetscFunctionBegin;
232   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
233   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
234 
235   if (lu->options.PrintStat) { /* collect time for mat conversion */
236     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
237     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
238   }
239 
240   if (lu->MatInputMode == GLOBAL) { /* global mat input */
241     if (size > 1) { /* convert mpi A to seq mat A */
242       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
243       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
244       ierr = ISDestroy(isrow);CHKERRQ(ierr);
245 
246       A_seq = *tseq;
247       ierr = PetscFree(tseq);CHKERRQ(ierr);
248       aa =  (Mat_SeqAIJ*)A_seq->data;
249     } else {
250       aa =  (Mat_SeqAIJ*)A->data;
251     }
252 
253     /* Convert Petsc NR matrix to SuperLU_DIST NC.
254        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
255     if (lu->flg == SAME_NONZERO_PATTERN) {/* successive numeric factorization, sparsity pattern is reused. */
256       Destroy_CompCol_Matrix_dist(&lu->A_sup);
257       Destroy_LU(N, &lu->grid, &lu->LUstruct);
258       lu->options.Fact = SamePattern;
259     }
260 #if defined(PETSC_USE_COMPLEX)
261     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
262 #else
263     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
264 #endif
265 
266     /* Create compressed column matrix A_sup. */
267 #if defined(PETSC_USE_COMPLEX)
268     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
269 #else
270     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
271 #endif
272   } else { /* distributed mat input */
273     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
274     aa=(Mat_SeqAIJ*)(mat->A)->data;
275     bb=(Mat_SeqAIJ*)(mat->B)->data;
276     ai=aa->i; aj=aa->j;
277     bi=bb->i; bj=bb->j;
278 #if defined(PETSC_USE_COMPLEX)
279     av=(doublecomplex*)aa->a;
280     bv=(doublecomplex*)bb->a;
281 #else
282     av=aa->a;
283     bv=bb->a;
284 #endif
285     rstart = A->rmap.rstart;
286     nz     = aa->nz + bb->nz;
287     garray = mat->garray;
288 
289     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
290 #if defined(PETSC_USE_COMPLEX)
291       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
292 #else
293       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
294 #endif
295     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
296       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
297       Destroy_LU(N, &lu->grid, &lu->LUstruct);
298       lu->options.Fact = SamePattern;
299     }
300     nz = 0; irow = rstart;
301     for ( i=0; i<m; i++ ) {
302       lu->row[i] = nz;
303       countA = ai[i+1] - ai[i];
304       countB = bi[i+1] - bi[i];
305       ajj = aj + ai[i];  /* ptr to the beginning of this row */
306       bjj = bj + bi[i];
307 
308       /* B part, smaller col index */
309       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
310       jB = 0;
311       for (j=0; j<countB; j++){
312         jcol = garray[bjj[j]];
313         if (jcol > colA_start) {
314           jB = j;
315           break;
316         }
317         lu->col[nz] = jcol;
318         lu->val[nz++] = *bv++;
319         if (j==countB-1) jB = countB;
320       }
321 
322       /* A part */
323       for (j=0; j<countA; j++){
324         lu->col[nz] = rstart + ajj[j];
325         lu->val[nz++] = *av++;
326       }
327 
328       /* B part, larger col index */
329       for (j=jB; j<countB; j++){
330         lu->col[nz] = garray[bjj[j]];
331         lu->val[nz++] = *bv++;
332       }
333     }
334     lu->row[m] = nz;
335 #if defined(PETSC_USE_COMPLEX)
336     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
337 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
338 #else
339     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
340 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
341 #endif
342   }
343   if (lu->options.PrintStat) {
344     ierr = PetscGetTime(&time);CHKERRQ(ierr);
345     time0 = time - time0;
346   }
347 
348   /* Factor the matrix. */
349   PStatInit(&stat);   /* Initialize the statistics variables. */
350 
351   if (lu->MatInputMode == GLOBAL) { /* global mat input */
352 #if defined(PETSC_USE_COMPLEX)
353     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
354                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
355 #else
356     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
357                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
358 #endif
359   } else { /* distributed mat input */
360 #if defined(PETSC_USE_COMPLEX)
361     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
362 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
363     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
364 #else
365     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
366 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
367     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
368 #endif
369   }
370 
371   if (lu->MatInputMode == GLOBAL && size > 1){
372     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
373   }
374 
375   if (lu->options.PrintStat) {
376     if (size > 1){
377       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
378       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
379       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
380       time = time/size; /* average time */
381       if (!rank)
382         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
383                               %g / %g / %g\n",time_max,time_min,time);
384     } else {
385       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
386                               %g\n",time0);
387     }
388 
389     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
390   }
391   PStatFree(&stat);
392   if (size > 1){
393     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
394     F_diag->assembled = PETSC_TRUE;
395   }
396   (*F)->assembled   = PETSC_TRUE;
397   lu->flg           = SAME_NONZERO_PATTERN;
398   PetscFunctionReturn(0);
399 }
400 
401 /* Note the Petsc r and c permutations are ignored */
402 #undef __FUNCT__
403 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
404 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
405 {
406   Mat               B;
407   Mat_SuperLU_DIST  *lu;
408   PetscErrorCode    ierr;
409   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
410   PetscMPIInt       size;
411   superlu_options_t options;
412   PetscTruth        flg;
413   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
414   const char        *prtype[] = {"LargeDiag","NATURAL"};
415 
416   PetscFunctionBegin;
417   /* Create the factorization matrix */
418   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
419   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
420   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
421   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
422   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
423 
424   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
425   B->ops->solve            = MatSolve_SuperLU_DIST;
426   B->factor                = FACTOR_LU;
427 
428   lu = (Mat_SuperLU_DIST*)(B->spptr);
429 
430   /* Set the input options */
431   set_default_options_dist(&options);
432 
433   lu->MatInputMode = GLOBAL;
434   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
435 
436   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
437   lu->nprow = size/2;               /* Default process rows.      */
438   if (!lu->nprow) lu->nprow = 1;
439   lu->npcol = size/lu->nprow;           /* Default process columns.   */
440 
441   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
442 
443     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
444     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
445     if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol");
446 
447     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
448     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
449 
450     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
451     if (!flg) {
452       options.Equil = NO;
453     }
454 
455     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
456     if (flg) {
457       switch (indx) {
458       case 0:
459         options.RowPerm = LargeDiag;
460         break;
461       case 1:
462         options.RowPerm = NOROWPERM;
463         break;
464       }
465     }
466 
467     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
468     if (flg) {
469       switch (indx) {
470       case 0:
471         options.ColPerm = MMD_AT_PLUS_A;
472         break;
473       case 1:
474         options.ColPerm = NATURAL;
475         break;
476       case 2:
477         options.ColPerm = MMD_ATA;
478         break;
479       }
480     }
481 
482     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
483     if (!flg) {
484       options.ReplaceTinyPivot = NO;
485     }
486 
487     options.IterRefine = NOREFINE;
488     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
489     if (flg) {
490       options.IterRefine = DOUBLE;
491     }
492 
493     if (PetscLogPrintInfo) {
494       options.PrintStat = YES;
495     } else {
496       options.PrintStat = NO;
497     }
498     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
499                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
500   PetscOptionsEnd();
501 
502   /* Initialize the SuperLU process grid. */
503   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
504 
505   /* Initialize ScalePermstruct and LUstruct. */
506   ScalePermstructInit(M, N, &lu->ScalePermstruct);
507   LUstructInit(M, N, &lu->LUstruct);
508 
509   lu->options            = options;
510   lu->flg                = DIFFERENT_NONZERO_PATTERN;
511   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
512   *F = B;
513 
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNCT__
518 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
519 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
520   PetscErrorCode   ierr;
521   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
522 
523   PetscFunctionBegin;
524   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
525   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
526   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
527   PetscFunctionReturn(0);
528 }
529 
530 #undef __FUNCT__
531 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
532 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
533 {
534   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
535   superlu_options_t options;
536   PetscErrorCode    ierr;
537 
538   PetscFunctionBegin;
539   /* check if matrix is superlu_dist type */
540   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
541 
542   options = lu->options;
543   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
544   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
545   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
546   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
547   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
548   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
549   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
550   if (options.ColPerm == NATURAL) {
551     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
552   } else if (options.ColPerm == MMD_AT_PLUS_A) {
553     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
554   } else if (options.ColPerm == MMD_ATA) {
555     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
556   } else {
557     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
558   }
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatView_SuperLU_DIST"
564 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
565 {
566   PetscErrorCode    ierr;
567   PetscTruth        iascii;
568   PetscViewerFormat format;
569   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
570 
571   PetscFunctionBegin;
572   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
573 
574   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
575   if (iascii) {
576     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
577     if (format == PETSC_VIEWER_ASCII_INFO) {
578       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
579     }
580   }
581   PetscFunctionReturn(0);
582 }
583 
584 
585 EXTERN_C_BEGIN
586 #undef __FUNCT__
587 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
588 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
589 {
590   /* This routine is only called to convert to MATSUPERLU_DIST */
591   /* from MATSEQAIJ if A has a single process communicator */
592   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
593   PetscErrorCode   ierr;
594   PetscMPIInt      size;
595   MPI_Comm         comm;
596   Mat              B=*newmat;
597   Mat_SuperLU_DIST *lu;
598 
599   PetscFunctionBegin;
600   if (reuse == MAT_INITIAL_MATRIX) {
601     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
602   }
603 
604   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
605   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
606 
607   lu->MatDuplicate         = A->ops->duplicate;
608   lu->MatView              = A->ops->view;
609   lu->MatAssemblyEnd       = A->ops->assemblyend;
610   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
611   lu->MatDestroy           = A->ops->destroy;
612   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
613 
614   B->spptr                 = (void*)lu;
615   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
616   B->ops->view             = MatView_SuperLU_DIST;
617   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
618   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
619   B->ops->destroy          = MatDestroy_SuperLU_DIST;
620   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
621   if (size == 1) {
622     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
623                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
624     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
625                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
626   } else {
627     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
628                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
629     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
630                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
631   }
632   ierr = PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr);
633   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
634   *newmat = B;
635   PetscFunctionReturn(0);
636 }
637 EXTERN_C_END
638 
639 #undef __FUNCT__
640 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
641 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
642   PetscErrorCode   ierr;
643   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
644 
645   PetscFunctionBegin;
646   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
647   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
648   PetscFunctionReturn(0);
649 }
650 
651 /*MC
652   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
653   via the external package SuperLU_DIST.
654 
655   If SuperLU_DIST is installed (see the manual for
656   instructions on how to declare the existence of external packages),
657   a matrix type can be constructed which invokes SuperLU_DIST solvers.
658   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
659 
660   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
661   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
662   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
663   for communicators controlling multiple processes.  It is recommended that you call both of
664   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
665   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
666   without data copy.
667 
668   Options Database Keys:
669 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
670 . -mat_superlu_dist_r <n> - number of rows in processor partition
671 . -mat_superlu_dist_c <n> - number of columns in processor partition
672 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
673 . -mat_superlu_dist_equil - equilibrate the matrix
674 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
675 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
676 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
677 . -mat_superlu_dist_iterrefine - use iterative refinement
678 - -mat_superlu_dist_statprint - print factorization information
679 
680    Level: beginner
681 
682 .seealso: PCLU
683 M*/
684 
685 EXTERN_C_BEGIN
686 #undef __FUNCT__
687 #define __FUNCT__ "MatCreate_SuperLU_DIST"
688 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
689 {
690   PetscErrorCode ierr;
691   PetscMPIInt    size;
692 
693   PetscFunctionBegin;
694   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
695   /*   and SuperLU_DIST types */
696   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
697   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
698   if (size == 1) {
699     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
700   } else {
701     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
702     /*  A_diag = 0x0 ???  -- do we need it?
703     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
704     ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
705     */
706   }
707   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 EXTERN_C_END
711 
712