xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision e09efc27b8d97beab4f87d56cc8cb4ed18475c45)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum { GLOBAL,DISTRIBUTED
22 } SuperLU_MatInputMode;
23 
24 typedef struct {
25   int_t                   nprow,npcol,*row,*col;
26   gridinfo_t              grid;
27   superlu_options_t       options;
28   SuperMatrix             A_sup;
29   ScalePermstruct_t       ScalePermstruct;
30   LUstruct_t              LUstruct;
31   int                     StatPrint;
32   int                     MatInputMode;
33   SOLVEstruct_t           SOLVEstruct;
34   MatStructure            flg;
35   MPI_Comm                comm_superlu;
36 #if defined(PETSC_USE_COMPLEX)
37   doublecomplex           *val;
38 #else
39   double                  *val;
40 #endif
41 
42   /* A few function pointers for inheritance */
43   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   PetscErrorCode (*MatView)(Mat,PetscViewer);
45   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 
55 EXTERN_C_BEGIN
56 #undef __FUNCT__
57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
59 {
60   PetscErrorCode   ierr;
61   Mat              B=*newmat;
62   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
63 
64   PetscFunctionBegin;
65   if (reuse == MAT_INITIAL_MATRIX) {
66     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
67   }
68   /* Reset the original function pointers */
69   B->ops->duplicate        = lu->MatDuplicate;
70   B->ops->view             = lu->MatView;
71   B->ops->assemblyend      = lu->MatAssemblyEnd;
72   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
73   B->ops->destroy          = lu->MatDestroy;
74 
75   ierr = PetscFree(lu);CHKERRQ(ierr);
76 
77   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
78   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
79   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
80   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
81 
82   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
83   *newmat = B;
84   PetscFunctionReturn(0);
85 }
86 EXTERN_C_END
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
91 {
92   PetscErrorCode   ierr;
93   PetscMPIInt      size;
94   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
95 
96   PetscFunctionBegin;
97   if (lu->CleanUpSuperLU_Dist) {
98     /* Deallocate SuperLU_DIST storage */
99     if (lu->MatInputMode == GLOBAL) {
100       Destroy_CompCol_Matrix_dist(&lu->A_sup);
101     } else {
102       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
103       if ( lu->options.SolveInitialized ) {
104 #if defined(PETSC_USE_COMPLEX)
105         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
106 #else
107         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
108 #endif
109       }
110     }
111     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
112     ScalePermstructFree(&lu->ScalePermstruct);
113     LUstructFree(&lu->LUstruct);
114 
115     /* Release the SuperLU_DIST process grid. */
116     superlu_gridexit(&lu->grid);
117 
118     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
119   }
120 
121   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
122   if (size == 1) {
123     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
124   } else {
125     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
126   }
127   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
128 
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatSolve_SuperLU_DIST"
134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
135 {
136   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
137   PetscErrorCode   ierr;
138   PetscMPIInt      size;
139   PetscInt         m=A->M, N=A->N;
140   SuperLUStat_t    stat;
141   double           berr[1];
142   PetscScalar      *bptr;
143   PetscInt         info, nrhs=1;
144   Vec              x_seq;
145   IS               iden;
146   VecScatter       scat;
147 
148   PetscFunctionBegin;
149   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
150   if (size > 1) {
151     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
152       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
153       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
154       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
155       ierr = ISDestroy(iden);CHKERRQ(ierr);
156 
157       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
158       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
159       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
160     } else { /* distributed mat input */
161       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
162       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
163     }
164   } else { /* size == 1 */
165     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
166     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
167   }
168 
169   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
170 
171   PStatInit(&stat);        /* Initialize the statistics variables. */
172   if (lu->MatInputMode == GLOBAL) {
173 #if defined(PETSC_USE_COMPLEX)
174     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
175                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
176 #else
177     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
178                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
179 #endif
180   } else { /* distributed mat input */
181 #if defined(PETSC_USE_COMPLEX)
182     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
183 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
184     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
185 #else
186     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
187 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
188     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
189 #endif
190   }
191   if (lu->options.PrintStat) {
192      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
193   }
194   PStatFree(&stat);
195 
196   if (size > 1) {
197     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
198       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
199       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
200       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
201       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
202       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
203     } else {
204       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
205     }
206   } else {
207     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
208   }
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
214 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
215 {
216   Mat              *tseq,A_seq = PETSC_NULL;
217   Mat_SeqAIJ       *aa,*bb;
218   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
219   PetscErrorCode   ierr;
220   PetscInt         M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
221                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
222   PetscMPIInt      size,rank;
223   SuperLUStat_t    stat;
224   double           *berr=0;
225   IS               isrow;
226   PetscLogDouble   time0,time,time_min,time_max;
227   Mat              F_diag;
228 #if defined(PETSC_USE_COMPLEX)
229   doublecomplex    *av, *bv;
230 #else
231   double           *av, *bv;
232 #endif
233 
234   PetscFunctionBegin;
235   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
236   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
237 
238   if (lu->options.PrintStat) { /* collect time for mat conversion */
239     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
240     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
241   }
242 
243   if (lu->MatInputMode == GLOBAL) { /* global mat input */
244     if (size > 1) { /* convert mpi A to seq mat A */
245       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
246       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
247       ierr = ISDestroy(isrow);CHKERRQ(ierr);
248 
249       A_seq = *tseq;
250       ierr = PetscFree(tseq);CHKERRQ(ierr);
251       aa =  (Mat_SeqAIJ*)A_seq->data;
252     } else {
253       aa =  (Mat_SeqAIJ*)A->data;
254     }
255 
256     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
257     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
258 #if defined(PETSC_USE_COMPLEX)
259       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
260 #else
261       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
262 #endif
263     } else { /* successive numeric factorization, sparsity pattern is reused. */
264       Destroy_CompCol_Matrix_dist(&lu->A_sup);
265       Destroy_LU(N, &lu->grid, &lu->LUstruct);
266       lu->options.Fact = SamePattern;
267     }
268 #if defined(PETSC_USE_COMPLEX)
269     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
270 #else
271     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
272 #endif
273 
274     /* Create compressed column matrix A_sup. */
275 #if defined(PETSC_USE_COMPLEX)
276     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
277 #else
278     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
279 #endif
280   } else { /* distributed mat input */
281     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
282     aa=(Mat_SeqAIJ*)(mat->A)->data;
283     bb=(Mat_SeqAIJ*)(mat->B)->data;
284     ai=aa->i; aj=aa->j;
285     bi=bb->i; bj=bb->j;
286 #if defined(PETSC_USE_COMPLEX)
287     av=(doublecomplex*)aa->a;
288     bv=(doublecomplex*)bb->a;
289 #else
290     av=aa->a;
291     bv=bb->a;
292 #endif
293     rstart = mat->rstart;
294     nz     = aa->nz + bb->nz;
295     garray = mat->garray;
296     rstart = mat->rstart;
297 
298     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
299 #if defined(PETSC_USE_COMPLEX)
300       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
301 #else
302       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
303 #endif
304     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
305       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
306       Destroy_LU(N, &lu->grid, &lu->LUstruct);
307       lu->options.Fact = SamePattern;
308     }
309     nz = 0; irow = mat->rstart;
310     for ( i=0; i<m; i++ ) {
311       lu->row[i] = nz;
312       countA = ai[i+1] - ai[i];
313       countB = bi[i+1] - bi[i];
314       ajj = aj + ai[i];  /* ptr to the beginning of this row */
315       bjj = bj + bi[i];
316 
317       /* B part, smaller col index */
318       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
319       jB = 0;
320       for (j=0; j<countB; j++){
321         jcol = garray[bjj[j]];
322         if (jcol > colA_start) {
323           jB = j;
324           break;
325         }
326         lu->col[nz] = jcol;
327         lu->val[nz++] = *bv++;
328         if (j==countB-1) jB = countB;
329       }
330 
331       /* A part */
332       for (j=0; j<countA; j++){
333         lu->col[nz] = mat->rstart + ajj[j];
334         lu->val[nz++] = *av++;
335       }
336 
337       /* B part, larger col index */
338       for (j=jB; j<countB; j++){
339         lu->col[nz] = garray[bjj[j]];
340         lu->val[nz++] = *bv++;
341       }
342     }
343     lu->row[m] = nz;
344 #if defined(PETSC_USE_COMPLEX)
345     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
346 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
347 #else
348     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
349 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
350 #endif
351   }
352   if (lu->options.PrintStat) {
353     ierr = PetscGetTime(&time);CHKERRQ(ierr);
354     time0 = time - time0;
355   }
356 
357   /* Factor the matrix. */
358   PStatInit(&stat);   /* Initialize the statistics variables. */
359 
360   if (lu->MatInputMode == GLOBAL) { /* global mat input */
361 #if defined(PETSC_USE_COMPLEX)
362     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
363                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
364 #else
365     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
366                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
367 #endif
368   } else { /* distributed mat input */
369 #if defined(PETSC_USE_COMPLEX)
370     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
371 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
372     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
373 #else
374     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
375 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
376     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
377 #endif
378   }
379 
380   if (lu->MatInputMode == GLOBAL && size > 1){
381     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
382   }
383 
384   if (lu->options.PrintStat) {
385     if (size > 1){
386       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
387       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
388       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
389       time = time/size; /* average time */
390       if (!rank)
391         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
392                               %g / %g / %g\n",time_max,time_min,time);
393     } else {
394       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
395                               %g\n",time0);
396     }
397 
398     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
399   }
400   PStatFree(&stat);
401 
402   F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
403   F_diag->assembled = PETSC_TRUE;
404   (*F)->assembled   = PETSC_TRUE;
405   lu->flg           = SAME_NONZERO_PATTERN;
406   PetscFunctionReturn(0);
407 }
408 
409 /* Note the Petsc r and c permutations are ignored */
410 #undef __FUNCT__
411 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
412 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
413 {
414   Mat               B;
415   Mat_SuperLU_DIST  *lu;
416   PetscErrorCode    ierr;
417   PetscInt          M=A->M,N=A->N,indx;
418   PetscMPIInt       size;
419   superlu_options_t options;
420   PetscTruth        flg;
421   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
422   const char        *prtype[] = {"LargeDiag","NATURAL"};
423 
424   PetscFunctionBegin;
425   /* Create the factorization matrix */
426   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
427   ierr = MatSetSizes(B,A->m,A->n,M,N);CHKERRQ(ierr);
428   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
429   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
430   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
431 
432   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
433   B->ops->solve            = MatSolve_SuperLU_DIST;
434   B->factor                = FACTOR_LU;
435 
436   lu = (Mat_SuperLU_DIST*)(B->spptr);
437 
438   /* Set the input options */
439   set_default_options_dist(&options);
440 
441   lu->MatInputMode = GLOBAL;
442   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
443 
444   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
445   lu->nprow = size/2;               /* Default process rows.      */
446   if (!lu->nprow) lu->nprow = 1;
447   lu->npcol = size/lu->nprow;           /* Default process columns.   */
448 
449   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
450 
451     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
452     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
453     if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol");
454 
455     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
456     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
457 
458     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
459     if (!flg) {
460       options.Equil = NO;
461     }
462 
463     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
464     if (flg) {
465       switch (indx) {
466       case 0:
467         options.RowPerm = LargeDiag;
468         break;
469       case 1:
470         options.RowPerm = NOROWPERM;
471         break;
472       }
473     }
474 
475     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
476     if (flg) {
477       switch (indx) {
478       case 0:
479         options.ColPerm = MMD_AT_PLUS_A;
480         break;
481       case 1:
482         options.ColPerm = NATURAL;
483         break;
484       case 2:
485         options.ColPerm = MMD_ATA;
486         break;
487       }
488     }
489 
490     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
491     if (!flg) {
492       options.ReplaceTinyPivot = NO;
493     }
494 
495     options.IterRefine = NOREFINE;
496     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
497     if (flg) {
498       options.IterRefine = DOUBLE;
499     }
500 
501     if (PetscLogPrintInfo) {
502       options.PrintStat = YES;
503     } else {
504       options.PrintStat = NO;
505     }
506     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
507                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
508   PetscOptionsEnd();
509 
510   /* Initialize the SuperLU process grid. */
511   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
512 
513   /* Initialize ScalePermstruct and LUstruct. */
514   ScalePermstructInit(M, N, &lu->ScalePermstruct);
515   LUstructInit(M, N, &lu->LUstruct);
516 
517   lu->options            = options;
518   lu->flg                = DIFFERENT_NONZERO_PATTERN;
519   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
520   *F = B;
521 
522   PetscFunctionReturn(0);
523 }
524 
525 #undef __FUNCT__
526 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
527 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
528   PetscErrorCode   ierr;
529   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
530 
531   PetscFunctionBegin;
532   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
533   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
534   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
535   PetscFunctionReturn(0);
536 }
537 
538 #undef __FUNCT__
539 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
540 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
541 {
542   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
543   superlu_options_t options;
544   PetscErrorCode    ierr;
545 
546   PetscFunctionBegin;
547   /* check if matrix is superlu_dist type */
548   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
549 
550   options = lu->options;
551   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
552   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
553   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
554   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
555   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
556   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
557   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
558   if (options.ColPerm == NATURAL) {
559     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
560   } else if (options.ColPerm == MMD_AT_PLUS_A) {
561     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
562   } else if (options.ColPerm == MMD_ATA) {
563     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
564   } else {
565     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
566   }
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "MatView_SuperLU_DIST"
572 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
573 {
574   PetscErrorCode    ierr;
575   PetscTruth        iascii;
576   PetscViewerFormat format;
577   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
578 
579   PetscFunctionBegin;
580   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
581 
582   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
583   if (iascii) {
584     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
585     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
586       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
587     }
588   }
589   PetscFunctionReturn(0);
590 }
591 
592 
593 EXTERN_C_BEGIN
594 #undef __FUNCT__
595 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
596 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
597 {
598   /* This routine is only called to convert to MATSUPERLU_DIST */
599   /* from MATSEQAIJ if A has a single process communicator */
600   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
601   PetscErrorCode   ierr;
602   PetscMPIInt      size;
603   MPI_Comm         comm;
604   Mat              B=*newmat;
605   Mat_SuperLU_DIST *lu;
606 
607   PetscFunctionBegin;
608   if (reuse == MAT_INITIAL_MATRIX) {
609     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
610   }
611 
612   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
613   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
614 
615   lu->MatDuplicate         = A->ops->duplicate;
616   lu->MatView              = A->ops->view;
617   lu->MatAssemblyEnd       = A->ops->assemblyend;
618   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
619   lu->MatDestroy           = A->ops->destroy;
620   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
621 
622   B->spptr                 = (void*)lu;
623   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
624   B->ops->view             = MatView_SuperLU_DIST;
625   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
626   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
627   B->ops->destroy          = MatDestroy_SuperLU_DIST;
628   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
629   if (size == 1) {
630     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
631                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
632     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
633                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
634   } else {
635     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
636                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
637     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
638                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
639   }
640   ierr = PetscLogInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr);
641   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
642   *newmat = B;
643   PetscFunctionReturn(0);
644 }
645 EXTERN_C_END
646 
647 #undef __FUNCT__
648 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
649 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
650   PetscErrorCode   ierr;
651   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
652 
653   PetscFunctionBegin;
654   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
655   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
656   PetscFunctionReturn(0);
657 }
658 
659 /*MC
660   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
661   via the external package SuperLU_DIST.
662 
663   If SuperLU_DIST is installed (see the manual for
664   instructions on how to declare the existence of external packages),
665   a matrix type can be constructed which invokes SuperLU_DIST solvers.
666   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
667 
668   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
669   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
670   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
671   for communicators controlling multiple processes.  It is recommended that you call both of
672   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
673   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
674   without data copy.
675 
676   Options Database Keys:
677 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
678 . -mat_superlu_dist_r <n> - number of rows in processor partition
679 . -mat_superlu_dist_c <n> - number of columns in processor partition
680 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
681 . -mat_superlu_dist_equil - equilibrate the matrix
682 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
683 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
684 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
685 . -mat_superlu_dist_iterrefine - use iterative refinement
686 - -mat_superlu_dist_statprint - print factorization information
687 
688    Level: beginner
689 
690 .seealso: PCLU
691 M*/
692 
693 EXTERN_C_BEGIN
694 #undef __FUNCT__
695 #define __FUNCT__ "MatCreate_SuperLU_DIST"
696 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
697 {
698   PetscErrorCode ierr;
699   PetscMPIInt    size;
700   Mat            A_diag;
701 
702   PetscFunctionBegin;
703   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
704   /*   and SuperLU_DIST types */
705   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
706   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
707   if (size == 1) {
708     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
709   } else {
710     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
711     A_diag = ((Mat_MPIAIJ *)A->data)->A;
712     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
713   }
714   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
715   PetscFunctionReturn(0);
716 }
717 EXTERN_C_END
718 
719