xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision ee45ca4afdde1af4d1deda7cd4dc1a4a63a3ea97)
1 /*
2         Provides an interface to the SuperLU_DIST_2.0 sparse solver
3 */
4 
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
8 #include "stdlib.h"
9 #endif
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13 #include "superlu_zdefs.h"
14 #else
15 #include "superlu_ddefs.h"
16 #endif
17 EXTERN_C_END
18 
19 typedef enum { GLOBAL,DISTRIBUTED
20 } SuperLU_MatInputMode;
21 
22 typedef struct {
23   int_t                   nprow,npcol,*row,*col;
24   gridinfo_t              grid;
25   superlu_options_t       options;
26   SuperMatrix             A_sup;
27   ScalePermstruct_t       ScalePermstruct;
28   LUstruct_t              LUstruct;
29   int                     StatPrint;
30   int                     MatInputMode;
31   SOLVEstruct_t           SOLVEstruct;
32   MatStructure            flg;
33   MPI_Comm                comm_superlu;
34 #if defined(PETSC_USE_COMPLEX)
35   doublecomplex           *val;
36 #else
37   double                  *val;
38 #endif
39 
40   /* A few function pointers for inheritance */
41   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
42   PetscErrorCode (*MatView)(Mat,PetscViewer);
43   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
44   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
45   PetscErrorCode (*MatDestroy)(Mat);
46 
47   /* Flag to clean up (non-global) SuperLU objects during Destroy */
48   PetscTruth CleanUpSuperLU_Dist;
49 } Mat_SuperLU_DIST;
50 
51 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
52 
53 EXTERN_C_BEGIN
54 #undef __FUNCT__
55 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
56 PetscErrorCode MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,Mat *newmat)
57 {
58   PetscErrorCode   ierr;
59   Mat              B=*newmat;
60   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
61 
62   PetscFunctionBegin;
63   if (B != A) {
64     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
65   }
66   /* Reset the original function pointers */
67   B->ops->duplicate        = lu->MatDuplicate;
68   B->ops->view             = lu->MatView;
69   B->ops->assemblyend      = lu->MatAssemblyEnd;
70   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
71   B->ops->destroy          = lu->MatDestroy;
72 
73   ierr = PetscFree(lu);CHKERRQ(ierr);
74 
75   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
76   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
77   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
78   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
79 
80   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
81   *newmat = B;
82   PetscFunctionReturn(0);
83 }
84 EXTERN_C_END
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
88 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
89 {
90   PetscErrorCode ierr;
91   int              size;
92   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
93 
94   PetscFunctionBegin;
95   if (lu->CleanUpSuperLU_Dist) {
96     /* Deallocate SuperLU_DIST storage */
97     if (lu->MatInputMode == GLOBAL) {
98       Destroy_CompCol_Matrix_dist(&lu->A_sup);
99     } else {
100       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
101       if ( lu->options.SolveInitialized ) {
102 #if defined(PETSC_USE_COMPLEX)
103         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
104 #else
105         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
106 #endif
107       }
108     }
109     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
110     ScalePermstructFree(&lu->ScalePermstruct);
111     LUstructFree(&lu->LUstruct);
112 
113     /* Release the SuperLU_DIST process grid. */
114     superlu_gridexit(&lu->grid);
115 
116     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
117   }
118 
119   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
120   if (size == 1) {
121     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,&A);CHKERRQ(ierr);
122   } else {
123     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,&A);CHKERRQ(ierr);
124   }
125   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
126 
127   PetscFunctionReturn(0);
128 }
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSolve_SuperLU_DIST"
132 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
133 {
134   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
135   PetscErrorCode ierr;
136   int              size;
137   int              m=A->M, N=A->N;
138   SuperLUStat_t    stat;
139   double           berr[1];
140   PetscScalar      *bptr;
141   int              info, nrhs=1;
142   Vec              x_seq;
143   IS               iden;
144   VecScatter       scat;
145 
146   PetscFunctionBegin;
147   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
148   if (size > 1) {
149     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
150       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
151       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
152       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
153       ierr = ISDestroy(iden);CHKERRQ(ierr);
154 
155       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
156       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
157       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
158     } else { /* distributed mat input */
159       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
160       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
161     }
162   } else { /* size == 1 */
163     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
164     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
165   }
166 
167   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
168 
169   PStatInit(&stat);        /* Initialize the statistics variables. */
170 
171   if (lu->MatInputMode == GLOBAL) {
172 #if defined(PETSC_USE_COMPLEX)
173     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
174                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
175 #else
176     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
177                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
178 #endif
179   } else { /* distributed mat input */
180 #if defined(PETSC_USE_COMPLEX)
181     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
182 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
183     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
184 #else
185     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
186 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
187     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
188 #endif
189   }
190   if (lu->StatPrint) {
191      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
192   }
193   PStatFree(&stat);
194 
195   if (size > 1) {
196     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
197       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
198       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
199       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
200       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
201       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
202     } else {
203       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
204     }
205   } else {
206     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
207   }
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
213 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
214 {
215   Mat              *tseq,A_seq = PETSC_NULL;
216   Mat_SeqAIJ       *aa,*bb;
217   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
218   PetscErrorCode ierr;
219   int              M=A->M,N=A->N,info,size,rank,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
220                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
221   SuperLUStat_t    stat;
222   double           *berr=0;
223   IS               isrow;
224   PetscLogDouble   time0,time,time_min,time_max;
225 #if defined(PETSC_USE_COMPLEX)
226   doublecomplex    *av, *bv;
227 #else
228   double           *av, *bv;
229 #endif
230 
231   PetscFunctionBegin;
232   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
233   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
234 
235   if (lu->StatPrint) { /* collect time for mat conversion */
236     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
237     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
238   }
239 
240   if (lu->MatInputMode == GLOBAL) { /* global mat input */
241     if (size > 1) { /* convert mpi A to seq mat A */
242       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
243       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
244       ierr = ISDestroy(isrow);CHKERRQ(ierr);
245 
246       A_seq = *tseq;
247       ierr = PetscFree(tseq);CHKERRQ(ierr);
248       aa =  (Mat_SeqAIJ*)A_seq->data;
249     } else {
250       aa =  (Mat_SeqAIJ*)A->data;
251     }
252 
253     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
254     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
255 #if defined(PETSC_USE_COMPLEX)
256       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
257 #else
258       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
259 #endif
260     } else { /* successive numeric factorization, sparsity pattern is reused. */
261       Destroy_CompCol_Matrix_dist(&lu->A_sup);
262       Destroy_LU(N, &lu->grid, &lu->LUstruct);
263       lu->options.Fact = SamePattern;
264     }
265 #if defined(PETSC_USE_COMPLEX)
266     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
267 #else
268     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
269 #endif
270 
271     /* Create compressed column matrix A_sup. */
272 #if defined(PETSC_USE_COMPLEX)
273     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
274 #else
275     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
276 #endif
277   } else { /* distributed mat input */
278     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
279     aa=(Mat_SeqAIJ*)(mat->A)->data;
280     bb=(Mat_SeqAIJ*)(mat->B)->data;
281     ai=aa->i; aj=aa->j;
282     bi=bb->i; bj=bb->j;
283 #if defined(PETSC_USE_COMPLEX)
284     av=(doublecomplex*)aa->a;
285     bv=(doublecomplex*)bb->a;
286 #else
287     av=aa->a;
288     bv=bb->a;
289 #endif
290     rstart = mat->rstart;
291     nz     = aa->nz + bb->nz;
292     garray = mat->garray;
293     rstart = mat->rstart;
294 
295     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
296 #if defined(PETSC_USE_COMPLEX)
297       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
298 #else
299       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
300 #endif
301     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
302       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
303       Destroy_LU(N, &lu->grid, &lu->LUstruct);
304       lu->options.Fact = SamePattern;
305     }
306     nz = 0; irow = mat->rstart;
307     for ( i=0; i<m; i++ ) {
308       lu->row[i] = nz;
309       countA = ai[i+1] - ai[i];
310       countB = bi[i+1] - bi[i];
311       ajj = aj + ai[i];  /* ptr to the beginning of this row */
312       bjj = bj + bi[i];
313 
314       /* B part, smaller col index */
315       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
316       jB = 0;
317       for (j=0; j<countB; j++){
318         jcol = garray[bjj[j]];
319         if (jcol > colA_start) {
320           jB = j;
321           break;
322         }
323         lu->col[nz] = jcol;
324         lu->val[nz++] = *bv++;
325         if (j==countB-1) jB = countB;
326       }
327 
328       /* A part */
329       for (j=0; j<countA; j++){
330         lu->col[nz] = mat->rstart + ajj[j];
331         lu->val[nz++] = *av++;
332       }
333 
334       /* B part, larger col index */
335       for (j=jB; j<countB; j++){
336         lu->col[nz] = garray[bjj[j]];
337         lu->val[nz++] = *bv++;
338       }
339     }
340     lu->row[m] = nz;
341 #if defined(PETSC_USE_COMPLEX)
342     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
343 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
344 #else
345     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
346 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
347 #endif
348   }
349   if (lu->StatPrint) {
350     ierr = PetscGetTime(&time);CHKERRQ(ierr);
351     time0 = time - time0;
352   }
353 
354   /* Factor the matrix. */
355   PStatInit(&stat);   /* Initialize the statistics variables. */
356 
357   if (lu->MatInputMode == GLOBAL) { /* global mat input */
358 #if defined(PETSC_USE_COMPLEX)
359     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
360                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
361 #else
362     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
363                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
364 #endif
365   } else { /* distributed mat input */
366 #if defined(PETSC_USE_COMPLEX)
367     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
368 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
369     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
370 #else
371     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
372 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
373     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
374 #endif
375   }
376 
377   if (lu->MatInputMode == GLOBAL && size > 1){
378     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
379   }
380 
381   if (lu->StatPrint) {
382     if (size > 1){
383       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
384       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
385       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
386       time = time/size; /* average time */
387       if (!rank)
388         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
389                               %g / %g / %g\n",time_max,time_min,time);
390     } else {
391       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
392                               %g\n",time0);
393     }
394 
395     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
396   }
397   PStatFree(&stat);
398   (*F)->assembled = PETSC_TRUE;
399   lu->flg         = SAME_NONZERO_PATTERN;
400   PetscFunctionReturn(0);
401 }
402 
403 /* Note the Petsc r and c permutations are ignored */
404 #undef __FUNCT__
405 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
406 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
407 {
408   Mat               B;
409   Mat_SuperLU_DIST  *lu;
410   PetscErrorCode ierr;
411   int               M=A->M,N=A->N,size,indx;
412   superlu_options_t options;
413   PetscTruth        flg;
414   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
415   const char        *prtype[] = {"LargeDiag","NATURAL"};
416 
417   PetscFunctionBegin;
418   /* Create the factorization matrix */
419   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
420   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
421   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
422   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
423 
424   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
425   B->ops->solve            = MatSolve_SuperLU_DIST;
426   B->factor                = FACTOR_LU;
427 
428   lu = (Mat_SuperLU_DIST*)(B->spptr);
429 
430   /* Set the input options */
431   set_default_options_dist(&options);
432   lu->MatInputMode = GLOBAL;
433   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
434 
435   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
436   lu->nprow = size/2;               /* Default process rows.      */
437   if (!lu->nprow) lu->nprow = 1;
438   lu->npcol = size/lu->nprow;           /* Default process columns.   */
439 
440   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
441 
442     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
443     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
444     if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol");
445 
446     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
447     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
448 
449     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
450     if (!flg) {
451       options.Equil = NO;
452     }
453 
454     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
455     if (flg) {
456       switch (indx) {
457       case 0:
458         options.RowPerm = LargeDiag;
459         break;
460       case 1:
461         options.RowPerm = NOROWPERM;
462         break;
463       }
464     }
465 
466     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
467     if (flg) {
468       switch (indx) {
469       case 0:
470         options.ColPerm = MMD_AT_PLUS_A;
471         break;
472       case 1:
473         options.ColPerm = NATURAL;
474         break;
475       case 2:
476         options.ColPerm = MMD_ATA;
477         break;
478       case 3:
479         options.ColPerm = COLAMD;
480         break;
481       }
482     }
483 
484     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
485     if (!flg) {
486       options.ReplaceTinyPivot = NO;
487     }
488 
489     options.IterRefine = NOREFINE;
490     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
491     if (flg) {
492       options.IterRefine = DOUBLE;
493     }
494 
495     if (PetscLogPrintInfo) {
496       lu->StatPrint = (int)PETSC_TRUE;
497     } else {
498       lu->StatPrint = (int)PETSC_FALSE;
499     }
500     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
501                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
502   PetscOptionsEnd();
503 
504   /* Initialize the SuperLU process grid. */
505   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
506 
507   /* Initialize ScalePermstruct and LUstruct. */
508   ScalePermstructInit(M, N, &lu->ScalePermstruct);
509   LUstructInit(M, N, &lu->LUstruct);
510 
511   lu->options            = options;
512   lu->flg                = DIFFERENT_NONZERO_PATTERN;
513   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
514   *F = B;
515   PetscFunctionReturn(0);
516 }
517 
518 #undef __FUNCT__
519 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
520 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
521   PetscErrorCode ierr;
522   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
523 
524   PetscFunctionBegin;
525   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
526   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
527   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
528   PetscFunctionReturn(0);
529 }
530 
531 #undef __FUNCT__
532 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
533 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
534 {
535   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
536   superlu_options_t options;
537   PetscErrorCode ierr;
538   char              *colperm;
539 
540   PetscFunctionBegin;
541   /* check if matrix is superlu_dist type */
542   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
543 
544   options = lu->options;
545   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
546   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
547   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
548   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
549   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
550   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
551   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
552   if (options.ColPerm == NATURAL) {
553     colperm = "NATURAL";
554   } else if (options.ColPerm == MMD_AT_PLUS_A) {
555     colperm = "MMD_AT_PLUS_A";
556   } else if (options.ColPerm == MMD_ATA) {
557     colperm = "MMD_ATA";
558   } else if (options.ColPerm == COLAMD) {
559     colperm = "COLAMD";
560   } else {
561     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
562   }
563   ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation %s \n",colperm);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 #undef __FUNCT__
568 #define __FUNCT__ "MatView_SuperLU_DIST"
569 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
570 {
571   PetscErrorCode ierr;
572   PetscTruth        iascii;
573   PetscViewerFormat format;
574   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
575 
576   PetscFunctionBegin;
577   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
578 
579   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
580   if (iascii) {
581     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
582     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
583       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
584     }
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 
590 EXTERN_C_BEGIN
591 #undef __FUNCT__
592 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
593 PetscErrorCode MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,Mat *newmat)
594 {
595   /* This routine is only called to convert to MATSUPERLU_DIST */
596   /* from MATSEQAIJ if A has a single process communicator */
597   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
598   PetscErrorCode   ierr;
599   PetscMPIInt      size;
600   MPI_Comm         comm;
601   Mat              B=*newmat;
602   Mat_SuperLU_DIST *lu;
603 
604   PetscFunctionBegin;
605   if (B != A) {
606     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
607   }
608 
609   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
610   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
611 
612   lu->MatDuplicate         = A->ops->duplicate;
613   lu->MatView              = A->ops->view;
614   lu->MatAssemblyEnd       = A->ops->assemblyend;
615   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
616   lu->MatDestroy           = A->ops->destroy;
617   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
618 
619   B->spptr                 = (void*)lu;
620   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
621   B->ops->view             = MatView_SuperLU_DIST;
622   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
623   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
624   B->ops->destroy          = MatDestroy_SuperLU_DIST;
625   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
626   if (size == 1) {
627     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
628                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
629     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
630                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
631   } else {
632     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
633                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
634     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
635                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
636   }
637   PetscLogInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.");
638   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
639   *newmat = B;
640   PetscFunctionReturn(0);
641 }
642 EXTERN_C_END
643 
644 #undef __FUNCT__
645 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
646 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
647   PetscErrorCode ierr;
648   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
649 
650   PetscFunctionBegin;
651   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
652   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 /*MC
657   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
658   via the external package SuperLU_DIST.
659 
660   If SuperLU_DIST is installed (see the manual for
661   instructions on how to declare the existence of external packages),
662   a matrix type can be constructed which invokes SuperLU_DIST solvers.
663   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
664 
665   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
666   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
667   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
668   for communicators controlling multiple processes.  It is recommended that you call both of
669   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
670   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
671   without data copy.
672 
673   Options Database Keys:
674 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
675 . -mat_superlu_dist_r <n> - number of rows in processor partition
676 . -mat_superlu_dist_c <n> - number of columns in processor partition
677 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
678 . -mat_superlu_dist_equil - equilibrate the matrix
679 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
680 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
681 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
682 . -mat_superlu_dist_iterrefine - use iterative refinement
683 - -mat_superlu_dist_statprint - print factorization information
684 
685    Level: beginner
686 
687 .seealso: PCLU
688 M*/
689 
690 EXTERN_C_BEGIN
691 #undef __FUNCT__
692 #define __FUNCT__ "MatCreate_SuperLU_DIST"
693 PetscErrorCode MatCreate_SuperLU_DIST(Mat A)
694 {
695   PetscErrorCode ierr;
696   int size;
697   Mat A_diag;
698 
699   PetscFunctionBegin;
700   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
701   /*   and SuperLU_DIST types */
702   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
703   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
704   if (size == 1) {
705     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
706   } else {
707     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
708     A_diag = ((Mat_MPIAIJ *)A->data)->A;
709     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,&A_diag);CHKERRQ(ierr);
710   }
711   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,&A);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 EXTERN_C_END
715 
716