xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision c0aa2d19b955bae1d38d9dc490b50fe48a12d3f7)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum { GLOBAL,DISTRIBUTED
22 } SuperLU_MatInputMode;
23 
24 typedef struct {
25   int_t                   nprow,npcol,*row,*col;
26   gridinfo_t              grid;
27   superlu_options_t       options;
28   SuperMatrix             A_sup;
29   ScalePermstruct_t       ScalePermstruct;
30   LUstruct_t              LUstruct;
31   int                     StatPrint;
32   int                     MatInputMode;
33   SOLVEstruct_t           SOLVEstruct;
34   MatStructure            flg;
35   MPI_Comm                comm_superlu;
36 #if defined(PETSC_USE_COMPLEX)
37   doublecomplex           *val;
38 #else
39   double                  *val;
40 #endif
41 
42   /* A few function pointers for inheritance */
43   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   PetscErrorCode (*MatView)(Mat,PetscViewer);
45   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 
55 EXTERN_C_BEGIN
56 #undef __FUNCT__
57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
59 {
60   PetscErrorCode   ierr;
61   Mat              B=*newmat;
62   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
63 
64   PetscFunctionBegin;
65   if (reuse == MAT_INITIAL_MATRIX) {
66     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
67   }
68   /* Reset the original function pointers */
69   B->ops->duplicate        = lu->MatDuplicate;
70   B->ops->view             = lu->MatView;
71   B->ops->assemblyend      = lu->MatAssemblyEnd;
72   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
73   B->ops->destroy          = lu->MatDestroy;
74 
75   ierr = PetscFree(lu);CHKERRQ(ierr);
76 
77   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
78   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
79   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
80   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
81 
82   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
83   *newmat = B;
84   PetscFunctionReturn(0);
85 }
86 EXTERN_C_END
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
91 {
92   PetscErrorCode   ierr;
93   PetscMPIInt      size;
94   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
95 
96   PetscFunctionBegin;
97   if (lu->CleanUpSuperLU_Dist) {
98     /* Deallocate SuperLU_DIST storage */
99     if (lu->MatInputMode == GLOBAL) {
100       Destroy_CompCol_Matrix_dist(&lu->A_sup);
101     } else {
102       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
103       if ( lu->options.SolveInitialized ) {
104 #if defined(PETSC_USE_COMPLEX)
105         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
106 #else
107         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
108 #endif
109       }
110     }
111     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
112     ScalePermstructFree(&lu->ScalePermstruct);
113     LUstructFree(&lu->LUstruct);
114 
115     /* Release the SuperLU_DIST process grid. */
116     superlu_gridexit(&lu->grid);
117 
118     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
119   }
120 
121   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
122   if (size == 1) {
123     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
124   } else {
125     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
126   }
127   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
128 
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatSolve_SuperLU_DIST"
134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
135 {
136   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
137   PetscErrorCode   ierr;
138   PetscMPIInt      size;
139   PetscInt         m=A->M, N=A->N;
140   SuperLUStat_t    stat;
141   double           berr[1];
142   PetscScalar      *bptr;
143   PetscInt         info, nrhs=1;
144   Vec              x_seq;
145   IS               iden;
146   VecScatter       scat;
147 
148   PetscFunctionBegin;
149   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
150   if (size > 1) {
151     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
152       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
153       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
154       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
155       ierr = ISDestroy(iden);CHKERRQ(ierr);
156 
157       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
158       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
159       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
160     } else { /* distributed mat input */
161       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
162       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
163     }
164   } else { /* size == 1 */
165     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
166     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
167   }
168 
169   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
170 
171   PStatInit(&stat);        /* Initialize the statistics variables. */
172   if (lu->MatInputMode == GLOBAL) {
173 #if defined(PETSC_USE_COMPLEX)
174     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
175                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
176 #else
177     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
178                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
179 #endif
180   } else { /* distributed mat input */
181 #if defined(PETSC_USE_COMPLEX)
182     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
183 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
184     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
185 #else
186     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
187 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
188     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
189 #endif
190   }
191   if (lu->options.PrintStat) {
192      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
193   }
194   PStatFree(&stat);
195 
196   if (size > 1) {
197     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
198       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
199       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
200       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
201       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
202       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
203     } else {
204       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
205     }
206   } else {
207     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
208   }
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
214 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
215 {
216   Mat              *tseq,A_seq = PETSC_NULL;
217   Mat_SeqAIJ       *aa,*bb;
218   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
219   PetscErrorCode   ierr;
220   PetscInt         M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
221                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
222   PetscMPIInt      size,rank;
223   SuperLUStat_t    stat;
224   double           *berr=0;
225   IS               isrow;
226   PetscLogDouble   time0,time,time_min,time_max;
227 #if defined(PETSC_USE_COMPLEX)
228   doublecomplex    *av, *bv;
229 #else
230   double           *av, *bv;
231 #endif
232 
233   PetscFunctionBegin;
234   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
235   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
236 
237   if (lu->options.PrintStat) { /* collect time for mat conversion */
238     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
239     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
240   }
241 
242   if (lu->MatInputMode == GLOBAL) { /* global mat input */
243     if (size > 1) { /* convert mpi A to seq mat A */
244       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
245       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
246       ierr = ISDestroy(isrow);CHKERRQ(ierr);
247 
248       A_seq = *tseq;
249       ierr = PetscFree(tseq);CHKERRQ(ierr);
250       aa =  (Mat_SeqAIJ*)A_seq->data;
251     } else {
252       aa =  (Mat_SeqAIJ*)A->data;
253     }
254 
255     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
256     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
257 #if defined(PETSC_USE_COMPLEX)
258       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
259 #else
260       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
261 #endif
262     } else { /* successive numeric factorization, sparsity pattern is reused. */
263       Destroy_CompCol_Matrix_dist(&lu->A_sup);
264       Destroy_LU(N, &lu->grid, &lu->LUstruct);
265       lu->options.Fact = SamePattern;
266     }
267 #if defined(PETSC_USE_COMPLEX)
268     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
269 #else
270     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
271 #endif
272 
273     /* Create compressed column matrix A_sup. */
274 #if defined(PETSC_USE_COMPLEX)
275     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
276 #else
277     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
278 #endif
279   } else { /* distributed mat input */
280     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
281     aa=(Mat_SeqAIJ*)(mat->A)->data;
282     bb=(Mat_SeqAIJ*)(mat->B)->data;
283     ai=aa->i; aj=aa->j;
284     bi=bb->i; bj=bb->j;
285 #if defined(PETSC_USE_COMPLEX)
286     av=(doublecomplex*)aa->a;
287     bv=(doublecomplex*)bb->a;
288 #else
289     av=aa->a;
290     bv=bb->a;
291 #endif
292     rstart = mat->rstart;
293     nz     = aa->nz + bb->nz;
294     garray = mat->garray;
295     rstart = mat->rstart;
296 
297     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
298 #if defined(PETSC_USE_COMPLEX)
299       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
300 #else
301       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
302 #endif
303     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
304       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
305       Destroy_LU(N, &lu->grid, &lu->LUstruct);
306       lu->options.Fact = SamePattern;
307     }
308     nz = 0; irow = mat->rstart;
309     for ( i=0; i<m; i++ ) {
310       lu->row[i] = nz;
311       countA = ai[i+1] - ai[i];
312       countB = bi[i+1] - bi[i];
313       ajj = aj + ai[i];  /* ptr to the beginning of this row */
314       bjj = bj + bi[i];
315 
316       /* B part, smaller col index */
317       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
318       jB = 0;
319       for (j=0; j<countB; j++){
320         jcol = garray[bjj[j]];
321         if (jcol > colA_start) {
322           jB = j;
323           break;
324         }
325         lu->col[nz] = jcol;
326         lu->val[nz++] = *bv++;
327         if (j==countB-1) jB = countB;
328       }
329 
330       /* A part */
331       for (j=0; j<countA; j++){
332         lu->col[nz] = mat->rstart + ajj[j];
333         lu->val[nz++] = *av++;
334       }
335 
336       /* B part, larger col index */
337       for (j=jB; j<countB; j++){
338         lu->col[nz] = garray[bjj[j]];
339         lu->val[nz++] = *bv++;
340       }
341     }
342     lu->row[m] = nz;
343 #if defined(PETSC_USE_COMPLEX)
344     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
345 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
346 #else
347     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
348 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
349 #endif
350   }
351   if (lu->options.PrintStat) {
352     ierr = PetscGetTime(&time);CHKERRQ(ierr);
353     time0 = time - time0;
354   }
355 
356   /* Factor the matrix. */
357   PStatInit(&stat);   /* Initialize the statistics variables. */
358 
359   if (lu->MatInputMode == GLOBAL) { /* global mat input */
360 #if defined(PETSC_USE_COMPLEX)
361     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
362                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
363 #else
364     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
365                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
366 #endif
367   } else { /* distributed mat input */
368 #if defined(PETSC_USE_COMPLEX)
369     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
370 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
371     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
372 #else
373     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
374 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
375     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
376 #endif
377   }
378 
379   if (lu->MatInputMode == GLOBAL && size > 1){
380     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
381   }
382 
383   if (lu->options.PrintStat) {
384     if (size > 1){
385       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
386       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
387       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
388       time = time/size; /* average time */
389       if (!rank)
390         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
391                               %g / %g / %g\n",time_max,time_min,time);
392     } else {
393       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
394                               %g\n",time0);
395     }
396 
397     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
398   }
399   PStatFree(&stat);
400   (*F)->assembled = PETSC_TRUE;
401   lu->flg         = SAME_NONZERO_PATTERN;
402   PetscFunctionReturn(0);
403 }
404 
405 /* Note the Petsc r and c permutations are ignored */
406 #undef __FUNCT__
407 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
408 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
409 {
410   Mat               B;
411   Mat_SuperLU_DIST  *lu;
412   PetscErrorCode    ierr;
413   PetscInt          M=A->M,N=A->N,indx;
414   PetscMPIInt       size;
415   superlu_options_t options;
416   PetscTruth        flg;
417   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
418   const char        *prtype[] = {"LargeDiag","NATURAL"};
419 
420   PetscFunctionBegin;
421   /* Create the factorization matrix */
422   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
423   ierr = MatSetSizes(B,A->m,A->n,M,N);CHKERRQ(ierr);
424   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
425   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
426   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
427 
428   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
429   B->ops->solve            = MatSolve_SuperLU_DIST;
430   B->factor                = FACTOR_LU;
431 
432   lu = (Mat_SuperLU_DIST*)(B->spptr);
433 
434   /* Set the input options */
435   set_default_options_dist(&options);
436 
437   lu->MatInputMode = GLOBAL;
438   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
439 
440   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
441   lu->nprow = size/2;               /* Default process rows.      */
442   if (!lu->nprow) lu->nprow = 1;
443   lu->npcol = size/lu->nprow;           /* Default process columns.   */
444 
445   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
446 
447     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
448     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
449     if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol");
450 
451     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
452     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
453 
454     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
455     if (!flg) {
456       options.Equil = NO;
457     }
458 
459     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
460     if (flg) {
461       switch (indx) {
462       case 0:
463         options.RowPerm = LargeDiag;
464         break;
465       case 1:
466         options.RowPerm = NOROWPERM;
467         break;
468       }
469     }
470 
471     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
472     if (flg) {
473       switch (indx) {
474       case 0:
475         options.ColPerm = MMD_AT_PLUS_A;
476         break;
477       case 1:
478         options.ColPerm = NATURAL;
479         break;
480       case 2:
481         options.ColPerm = MMD_ATA;
482         break;
483       case 3:
484         options.ColPerm = COLAMD;
485         break;
486       }
487     }
488 
489     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
490     if (!flg) {
491       options.ReplaceTinyPivot = NO;
492     }
493 
494     options.IterRefine = NOREFINE;
495     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
496     if (flg) {
497       options.IterRefine = DOUBLE;
498     }
499 
500     if (PetscLogPrintInfo) {
501       options.PrintStat = YES;
502     } else {
503       options.PrintStat = NO;
504     }
505     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
506                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
507   PetscOptionsEnd();
508 
509   /* Initialize the SuperLU process grid. */
510   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
511 
512   /* Initialize ScalePermstruct and LUstruct. */
513   ScalePermstructInit(M, N, &lu->ScalePermstruct);
514   LUstructInit(M, N, &lu->LUstruct);
515 
516   lu->options            = options;
517   lu->flg                = DIFFERENT_NONZERO_PATTERN;
518   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
519   *F = B;
520 
521   PetscFunctionReturn(0);
522 }
523 
524 #undef __FUNCT__
525 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
526 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
527   PetscErrorCode   ierr;
528   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
529 
530   PetscFunctionBegin;
531   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
532   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
533   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
534   PetscFunctionReturn(0);
535 }
536 
537 #undef __FUNCT__
538 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
539 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
540 {
541   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
542   superlu_options_t options;
543   PetscErrorCode    ierr;
544 
545   PetscFunctionBegin;
546   /* check if matrix is superlu_dist type */
547   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
548 
549   options = lu->options;
550   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
551   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
552   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
553   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
554   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
555   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
556   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
557   if (options.ColPerm == NATURAL) {
558     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
559   } else if (options.ColPerm == MMD_AT_PLUS_A) {
560     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
561   } else if (options.ColPerm == MMD_ATA) {
562     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
563   } else if (options.ColPerm == COLAMD) {
564     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation COLAMD\n");CHKERRQ(ierr);
565   } else {
566     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
567   }
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatView_SuperLU_DIST"
573 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
574 {
575   PetscErrorCode    ierr;
576   PetscTruth        iascii;
577   PetscViewerFormat format;
578   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
579 
580   PetscFunctionBegin;
581   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
582 
583   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
584   if (iascii) {
585     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
586     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
587       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
588     }
589   }
590   PetscFunctionReturn(0);
591 }
592 
593 
594 EXTERN_C_BEGIN
595 #undef __FUNCT__
596 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
597 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
598 {
599   /* This routine is only called to convert to MATSUPERLU_DIST */
600   /* from MATSEQAIJ if A has a single process communicator */
601   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
602   PetscErrorCode   ierr;
603   PetscMPIInt      size;
604   MPI_Comm         comm;
605   Mat              B=*newmat;
606   Mat_SuperLU_DIST *lu;
607 
608   PetscFunctionBegin;
609   if (reuse == MAT_INITIAL_MATRIX) {
610     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
611   }
612 
613   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
614   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
615 
616   lu->MatDuplicate         = A->ops->duplicate;
617   lu->MatView              = A->ops->view;
618   lu->MatAssemblyEnd       = A->ops->assemblyend;
619   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
620   lu->MatDestroy           = A->ops->destroy;
621   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
622 
623   B->spptr                 = (void*)lu;
624   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
625   B->ops->view             = MatView_SuperLU_DIST;
626   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
627   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
628   B->ops->destroy          = MatDestroy_SuperLU_DIST;
629   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
630   if (size == 1) {
631     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
632                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
633     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
634                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
635   } else {
636     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
637                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
638     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
639                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
640   }
641   ierr = PetscLogInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr);
642   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
643   *newmat = B;
644   PetscFunctionReturn(0);
645 }
646 EXTERN_C_END
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
650 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
651   PetscErrorCode   ierr;
652   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
653 
654   PetscFunctionBegin;
655   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
656   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
657   PetscFunctionReturn(0);
658 }
659 
660 /*MC
661   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
662   via the external package SuperLU_DIST.
663 
664   If SuperLU_DIST is installed (see the manual for
665   instructions on how to declare the existence of external packages),
666   a matrix type can be constructed which invokes SuperLU_DIST solvers.
667   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
668 
669   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
670   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
671   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
672   for communicators controlling multiple processes.  It is recommended that you call both of
673   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
674   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
675   without data copy.
676 
677   Options Database Keys:
678 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
679 . -mat_superlu_dist_r <n> - number of rows in processor partition
680 . -mat_superlu_dist_c <n> - number of columns in processor partition
681 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
682 . -mat_superlu_dist_equil - equilibrate the matrix
683 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
684 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
685 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
686 . -mat_superlu_dist_iterrefine - use iterative refinement
687 - -mat_superlu_dist_statprint - print factorization information
688 
689    Level: beginner
690 
691 .seealso: PCLU
692 M*/
693 
694 EXTERN_C_BEGIN
695 #undef __FUNCT__
696 #define __FUNCT__ "MatCreate_SuperLU_DIST"
697 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
698 {
699   PetscErrorCode ierr;
700   PetscMPIInt    size;
701   Mat            A_diag;
702 
703   PetscFunctionBegin;
704   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
705   /*   and SuperLU_DIST types */
706   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
707   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
708   if (size == 1) {
709     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
710   } else {
711     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
712     A_diag = ((Mat_MPIAIJ *)A->data)->A;
713     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
714   }
715   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 EXTERN_C_END
719 
720