xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision f37cbb0890cfcc075bf42ae2b4760d76e6f1168d)
1 /*
2         Provides an interface to the SuperLU_DIST_2.0 sparse solver
3 */
4 
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
8 #include "stdlib.h"
9 #endif
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13 #include "superlu_zdefs.h"
14 #else
15 #include "superlu_ddefs.h"
16 #endif
17 EXTERN_C_END
18 
19 typedef enum { GLOBAL,DISTRIBUTED
20 } SuperLU_MatInputMode;
21 
22 typedef struct {
23   int_t                   nprow,npcol,*row,*col;
24   gridinfo_t              grid;
25   superlu_options_t       options;
26   SuperMatrix             A_sup;
27   ScalePermstruct_t       ScalePermstruct;
28   LUstruct_t              LUstruct;
29   int                     StatPrint;
30   int                     MatInputMode;
31   SOLVEstruct_t           SOLVEstruct;
32   MatStructure            flg;
33   MPI_Comm                comm_superlu;
34 #if defined(PETSC_USE_COMPLEX)
35   doublecomplex           *val;
36 #else
37   double                  *val;
38 #endif
39 
40   /* A few function pointers for inheritance */
41   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
42   PetscErrorCode (*MatView)(Mat,PetscViewer);
43   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
44   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
45   PetscErrorCode (*MatDestroy)(Mat);
46 
47   /* Flag to clean up (non-global) SuperLU objects during Destroy */
48   PetscTruth CleanUpSuperLU_Dist;
49 } Mat_SuperLU_DIST;
50 
51 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
52 
53 EXTERN_C_BEGIN
54 #undef __FUNCT__
55 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
56 PetscErrorCode MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,Mat *newmat) {
57   PetscErrorCode ierr;
58   Mat              B=*newmat;
59   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
60 
61   PetscFunctionBegin;
62   if (B != A) {
63     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
64   }
65   /* Reset the original function pointers */
66   B->ops->duplicate        = lu->MatDuplicate;
67   B->ops->view             = lu->MatView;
68   B->ops->assemblyend      = lu->MatAssemblyEnd;
69   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
70   B->ops->destroy          = lu->MatDestroy;
71 
72   ierr = PetscFree(lu);CHKERRQ(ierr);
73 
74   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
75   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
76   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
77   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
78 
79   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
80   *newmat = B;
81   PetscFunctionReturn(0);
82 }
83 EXTERN_C_END
84 
85 #undef __FUNCT__
86 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
87 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
88 {
89   PetscErrorCode ierr;
90   int              size;
91   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
92 
93   PetscFunctionBegin;
94   if (lu->CleanUpSuperLU_Dist) {
95     /* Deallocate SuperLU_DIST storage */
96     if (lu->MatInputMode == GLOBAL) {
97       Destroy_CompCol_Matrix_dist(&lu->A_sup);
98     } else {
99       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
100       if ( lu->options.SolveInitialized ) {
101 #if defined(PETSC_USE_COMPLEX)
102         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
103 #else
104         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
105 #endif
106       }
107     }
108     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
109     ScalePermstructFree(&lu->ScalePermstruct);
110     LUstructFree(&lu->LUstruct);
111 
112     /* Release the SuperLU_DIST process grid. */
113     superlu_gridexit(&lu->grid);
114 
115     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
116   }
117 
118   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
119   if (size == 1) {
120     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
121   } else {
122     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
123   }
124   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
125 
126   PetscFunctionReturn(0);
127 }
128 
129 #undef __FUNCT__
130 #define __FUNCT__ "MatSolve_SuperLU_DIST"
131 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
132 {
133   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
134   PetscErrorCode ierr;
135   int              size;
136   int              m=A->M, N=A->N;
137   SuperLUStat_t    stat;
138   double           berr[1];
139   PetscScalar      *bptr;
140   int              info, nrhs=1;
141   Vec              x_seq;
142   IS               iden;
143   VecScatter       scat;
144 
145   PetscFunctionBegin;
146   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
147   if (size > 1) {
148     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
149       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
150       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
151       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
152       ierr = ISDestroy(iden);CHKERRQ(ierr);
153 
154       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
155       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
156       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
157     } else { /* distributed mat input */
158       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
159       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
160     }
161   } else { /* size == 1 */
162     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
163     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
164   }
165 
166   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
167 
168   PStatInit(&stat);        /* Initialize the statistics variables. */
169 
170   if (lu->MatInputMode == GLOBAL) {
171 #if defined(PETSC_USE_COMPLEX)
172     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
173                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
174 #else
175     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
176                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
177 #endif
178   } else { /* distributed mat input */
179 #if defined(PETSC_USE_COMPLEX)
180     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
181 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
182     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
183 #else
184     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
185 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
186     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
187 #endif
188   }
189   if (lu->StatPrint) {
190      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
191   }
192   PStatFree(&stat);
193 
194   if (size > 1) {
195     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
196       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
197       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
198       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
199       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
200       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
201     } else {
202       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
203     }
204   } else {
205     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
206   }
207   PetscFunctionReturn(0);
208 }
209 
210 #undef __FUNCT__
211 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
212 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,Mat *F)
213 {
214   Mat              *tseq,A_seq = PETSC_NULL;
215   Mat_SeqAIJ       *aa,*bb;
216   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
217   PetscErrorCode ierr;
218   int              M=A->M,N=A->N,info,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
219                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
220   SuperLUStat_t    stat;
221   double           *berr=0;
222   IS               isrow;
223   PetscLogDouble   time0,time,time_min,time_max;
224 #if defined(PETSC_USE_COMPLEX)
225   doublecomplex    *av, *bv;
226 #else
227   double           *av, *bv;
228 #endif
229 
230   PetscFunctionBegin;
231   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
232   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
233 
234   if (lu->StatPrint) { /* collect time for mat conversion */
235     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
236     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
237   }
238 
239   if (lu->MatInputMode == GLOBAL) { /* global mat input */
240     if (size > 1) { /* convert mpi A to seq mat A */
241       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
242       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
243       ierr = ISDestroy(isrow);CHKERRQ(ierr);
244 
245       A_seq = *tseq;
246       ierr = PetscFree(tseq);CHKERRQ(ierr);
247       aa =  (Mat_SeqAIJ*)A_seq->data;
248     } else {
249       aa =  (Mat_SeqAIJ*)A->data;
250     }
251 
252     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
253     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
254 #if defined(PETSC_USE_COMPLEX)
255       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
256 #else
257       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
258 #endif
259     } else { /* successive numeric factorization, sparsity pattern is reused. */
260       Destroy_CompCol_Matrix_dist(&lu->A_sup);
261       Destroy_LU(N, &lu->grid, &lu->LUstruct);
262       lu->options.Fact = SamePattern;
263     }
264 #if defined(PETSC_USE_COMPLEX)
265     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
266 #else
267     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
268 #endif
269 
270     /* Create compressed column matrix A_sup. */
271 #if defined(PETSC_USE_COMPLEX)
272     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
273 #else
274     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
275 #endif
276   } else { /* distributed mat input */
277     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
278     aa=(Mat_SeqAIJ*)(mat->A)->data;
279     bb=(Mat_SeqAIJ*)(mat->B)->data;
280     ai=aa->i; aj=aa->j;
281     bi=bb->i; bj=bb->j;
282 #if defined(PETSC_USE_COMPLEX)
283     av=(doublecomplex*)aa->a;
284     bv=(doublecomplex*)bb->a;
285 #else
286     av=aa->a;
287     bv=bb->a;
288 #endif
289     rstart = mat->rstart;
290     nz     = aa->nz + bb->nz;
291     garray = mat->garray;
292     rstart = mat->rstart;
293 
294     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
295 #if defined(PETSC_USE_COMPLEX)
296       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
297 #else
298       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
299 #endif
300     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
301       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
302       Destroy_LU(N, &lu->grid, &lu->LUstruct);
303       lu->options.Fact = SamePattern;
304     }
305     nz = 0; irow = mat->rstart;
306     for ( i=0; i<m; i++ ) {
307       lu->row[i] = nz;
308       countA = ai[i+1] - ai[i];
309       countB = bi[i+1] - bi[i];
310       ajj = aj + ai[i];  /* ptr to the beginning of this row */
311       bjj = bj + bi[i];
312 
313       /* B part, smaller col index */
314       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
315       jB = 0;
316       for (j=0; j<countB; j++){
317         jcol = garray[bjj[j]];
318         if (jcol > colA_start) {
319           jB = j;
320           break;
321         }
322         lu->col[nz] = jcol;
323         lu->val[nz++] = *bv++;
324         if (j==countB-1) jB = countB;
325       }
326 
327       /* A part */
328       for (j=0; j<countA; j++){
329         lu->col[nz] = mat->rstart + ajj[j];
330         lu->val[nz++] = *av++;
331       }
332 
333       /* B part, larger col index */
334       for (j=jB; j<countB; j++){
335         lu->col[nz] = garray[bjj[j]];
336         lu->val[nz++] = *bv++;
337       }
338     }
339     lu->row[m] = nz;
340 #if defined(PETSC_USE_COMPLEX)
341     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
342 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
343 #else
344     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
345 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
346 #endif
347   }
348   if (lu->StatPrint) {
349     ierr = PetscGetTime(&time);CHKERRQ(ierr);
350     time0 = time - time0;
351   }
352 
353   /* Factor the matrix. */
354   PStatInit(&stat);   /* Initialize the statistics variables. */
355 
356   if (lu->MatInputMode == GLOBAL) { /* global mat input */
357 #if defined(PETSC_USE_COMPLEX)
358     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
359                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
360 #else
361     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
362                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
363 #endif
364   } else { /* distributed mat input */
365 #if defined(PETSC_USE_COMPLEX)
366     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
367 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
368     if (info) SETERRQ1(1,"pzgssvx fails, info: %d\n",info);
369 #else
370     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
371 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
372     if (info) SETERRQ1(1,"pdgssvx fails, info: %d\n",info);
373 #endif
374   }
375 
376   if (lu->MatInputMode == GLOBAL && size > 1){
377     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
378   }
379 
380   if (lu->StatPrint) {
381     if (size > 1){
382       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
383       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
384       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
385       time = time/size; /* average time */
386       if (!rank)
387         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
388                               %g / %g / %g\n",time_max,time_min,time);
389     } else {
390       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
391                               %g\n",time0);
392     }
393 
394     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
395   }
396   PStatFree(&stat);
397   (*F)->assembled = PETSC_TRUE;
398   lu->flg         = SAME_NONZERO_PATTERN;
399   PetscFunctionReturn(0);
400 }
401 
402 /* Note the Petsc r and c permutations are ignored */
403 #undef __FUNCT__
404 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
405 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
406 {
407   Mat               B;
408   Mat_SuperLU_DIST  *lu;
409   PetscErrorCode ierr;
410   int               M=A->M,N=A->N,size,indx;
411   superlu_options_t options;
412   PetscTruth        flg;
413   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
414   const char        *prtype[] = {"LargeDiag","NATURAL"};
415 
416   PetscFunctionBegin;
417   /* Create the factorization matrix */
418   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
419   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
420   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
421   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
422 
423   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
424   B->ops->solve            = MatSolve_SuperLU_DIST;
425   B->factor                = FACTOR_LU;
426 
427   lu = (Mat_SuperLU_DIST*)(B->spptr);
428 
429   /* Set the input options */
430   set_default_options_dist(&options);
431   lu->MatInputMode = GLOBAL;
432   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
433 
434   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
435   lu->nprow = size/2;               /* Default process rows.      */
436   if (!lu->nprow) lu->nprow = 1;
437   lu->npcol = size/lu->nprow;           /* Default process columns.   */
438 
439   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
440 
441     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
442     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
443     if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol");
444 
445     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
446     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
447 
448     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
449     if (!flg) {
450       options.Equil = NO;
451     }
452 
453     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
454     if (flg) {
455       switch (indx) {
456       case 0:
457         options.RowPerm = LargeDiag;
458         break;
459       case 1:
460         options.RowPerm = NOROWPERM;
461         break;
462       }
463     }
464 
465     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
466     if (flg) {
467       switch (indx) {
468       case 0:
469         options.ColPerm = MMD_AT_PLUS_A;
470         break;
471       case 1:
472         options.ColPerm = NATURAL;
473         break;
474       case 2:
475         options.ColPerm = MMD_ATA;
476         break;
477       case 3:
478         options.ColPerm = COLAMD;
479         break;
480       }
481     }
482 
483     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
484     if (!flg) {
485       options.ReplaceTinyPivot = NO;
486     }
487 
488     options.IterRefine = NOREFINE;
489     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
490     if (flg) {
491       options.IterRefine = DOUBLE;
492     }
493 
494     if (PetscLogPrintInfo) {
495       lu->StatPrint = (int)PETSC_TRUE;
496     } else {
497       lu->StatPrint = (int)PETSC_FALSE;
498     }
499     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
500                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
501   PetscOptionsEnd();
502 
503   /* Initialize the SuperLU process grid. */
504   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
505 
506   /* Initialize ScalePermstruct and LUstruct. */
507   ScalePermstructInit(M, N, &lu->ScalePermstruct);
508   LUstructInit(M, N, &lu->LUstruct);
509 
510   lu->options            = options;
511   lu->flg                = DIFFERENT_NONZERO_PATTERN;
512   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
513   *F = B;
514   PetscFunctionReturn(0);
515 }
516 
517 #undef __FUNCT__
518 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
519 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
520   PetscErrorCode ierr;
521   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
522 
523   PetscFunctionBegin;
524   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
525   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
526   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
527   PetscFunctionReturn(0);
528 }
529 
530 #undef __FUNCT__
531 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
532 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
533 {
534   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
535   superlu_options_t options;
536   PetscErrorCode ierr;
537   char              *colperm;
538 
539   PetscFunctionBegin;
540   /* check if matrix is superlu_dist type */
541   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
542 
543   options = lu->options;
544   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
545   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
546   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
547   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
548   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
549   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
550   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
551   if (options.ColPerm == NATURAL) {
552     colperm = "NATURAL";
553   } else if (options.ColPerm == MMD_AT_PLUS_A) {
554     colperm = "MMD_AT_PLUS_A";
555   } else if (options.ColPerm == MMD_ATA) {
556     colperm = "MMD_ATA";
557   } else if (options.ColPerm == COLAMD) {
558     colperm = "COLAMD";
559   } else {
560     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
561   }
562   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatView_SuperLU_DIST"
568 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
569 {
570   PetscErrorCode ierr;
571   PetscTruth        iascii;
572   PetscViewerFormat format;
573   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
574 
575   PetscFunctionBegin;
576   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
577 
578   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
579   if (iascii) {
580     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
581     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
582       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
583     }
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 
589 EXTERN_C_BEGIN
590 #undef __FUNCT__
591 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
592 PetscErrorCode MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,Mat *newmat) {
593   /* This routine is only called to convert to MATSUPERLU_DIST */
594   /* from MATSEQAIJ if A has a single process communicator */
595   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
596   PetscErrorCode ierr;
597   int              size;
598   MPI_Comm         comm;
599   Mat              B=*newmat;
600   Mat_SuperLU_DIST *lu;
601 
602   PetscFunctionBegin;
603   if (B != A) {
604     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
605   }
606 
607   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
608   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
609 
610   lu->MatDuplicate         = A->ops->duplicate;
611   lu->MatView              = A->ops->view;
612   lu->MatAssemblyEnd       = A->ops->assemblyend;
613   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
614   lu->MatDestroy           = A->ops->destroy;
615   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
616 
617   B->spptr                 = (void*)lu;
618   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
619   B->ops->view             = MatView_SuperLU_DIST;
620   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
621   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
622   B->ops->destroy          = MatDestroy_SuperLU_DIST;
623   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
624   if (size == 1) {
625     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
626                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
627     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
628                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
629   } else {
630     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
631                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
632     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
633                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
634   }
635   PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.");
636   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
637   *newmat = B;
638   PetscFunctionReturn(0);
639 }
640 EXTERN_C_END
641 
642 #undef __FUNCT__
643 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
644 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
645   PetscErrorCode ierr;
646   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
647 
648   PetscFunctionBegin;
649   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
650   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 
654 /*MC
655   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
656   via the external package SuperLU_DIST.
657 
658   If SuperLU_DIST is installed (see the manual for
659   instructions on how to declare the existence of external packages),
660   a matrix type can be constructed which invokes SuperLU_DIST solvers.
661   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
662   This matrix type is only supported for double precision real.
663 
664   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
665   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
666   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
667   for communicators controlling multiple processes.  It is recommended that you call both of
668   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
669   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
670   without data copy.
671 
672   Options Database Keys:
673 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
674 . -mat_superlu_dist_r <n> - number of rows in processor partition
675 . -mat_superlu_dist_c <n> - number of columns in processor partition
676 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
677 . -mat_superlu_dist_equil - equilibrate the matrix
678 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
679 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
680 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
681 . -mat_superlu_dist_iterrefine - use iterative refinement
682 - -mat_superlu_dist_statprint - print factorization information
683 
684    Level: beginner
685 
686 .seealso: PCLU
687 M*/
688 
689 EXTERN_C_BEGIN
690 #undef __FUNCT__
691 #define __FUNCT__ "MatCreate_SuperLU_DIST"
692 PetscErrorCode MatCreate_SuperLU_DIST(Mat A)
693 {
694   PetscErrorCode ierr;
695   int size;
696   Mat A_diag;
697 
698   PetscFunctionBegin;
699   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
700   /*   and SuperLU_DIST types */
701   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
702   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
703   if (size == 1) {
704     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
705   } else {
706     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
707     A_diag = ((Mat_MPIAIJ *)A->data)->A;
708     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,&A_diag);CHKERRQ(ierr);
709   }
710   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 EXTERN_C_END
714 
715