xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision f259bd47db2537c0c1948ca59840a8a8f5c67e64)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum { GLOBAL,DISTRIBUTED
22 } SuperLU_MatInputMode;
23 
24 typedef struct {
25   int_t                   nprow,npcol,*row,*col;
26   gridinfo_t              grid;
27   superlu_options_t       options;
28   SuperMatrix             A_sup;
29   ScalePermstruct_t       ScalePermstruct;
30   LUstruct_t              LUstruct;
31   int                     StatPrint;
32   int                     MatInputMode;
33   SOLVEstruct_t           SOLVEstruct;
34   MatStructure            flg;
35   MPI_Comm                comm_superlu;
36 #if defined(PETSC_USE_COMPLEX)
37   doublecomplex           *val;
38 #else
39   double                  *val;
40 #endif
41 
42   /* A few function pointers for inheritance */
43   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   PetscErrorCode (*MatView)(Mat,PetscViewer);
45   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 
55 EXTERN_C_BEGIN
56 #undef __FUNCT__
57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
59 {
60   PetscErrorCode   ierr;
61   Mat              B=*newmat;
62   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
63 
64   PetscFunctionBegin;
65   if (reuse == MAT_INITIAL_MATRIX) {
66     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
67   }
68   /* Reset the original function pointers */
69   B->ops->duplicate        = lu->MatDuplicate;
70   B->ops->view             = lu->MatView;
71   B->ops->assemblyend      = lu->MatAssemblyEnd;
72   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
73   B->ops->destroy          = lu->MatDestroy;
74 
75   ierr = PetscFree(lu);CHKERRQ(ierr);
76 
77   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
78   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
79   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
80   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
81 
82   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
83   *newmat = B;
84   PetscFunctionReturn(0);
85 }
86 EXTERN_C_END
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
91 {
92   PetscErrorCode   ierr;
93   PetscMPIInt      size;
94   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
95 
96   PetscFunctionBegin;
97   if (lu->CleanUpSuperLU_Dist) {
98     /* Deallocate SuperLU_DIST storage */
99     if (lu->MatInputMode == GLOBAL) {
100       Destroy_CompCol_Matrix_dist(&lu->A_sup);
101     } else {
102       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
103       if ( lu->options.SolveInitialized ) {
104 #if defined(PETSC_USE_COMPLEX)
105         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
106 #else
107         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
108 #endif
109       }
110     }
111     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
112     ScalePermstructFree(&lu->ScalePermstruct);
113     LUstructFree(&lu->LUstruct);
114 
115     /* Release the SuperLU_DIST process grid. */
116     superlu_gridexit(&lu->grid);
117 
118     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
119   }
120 
121   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
122   if (size == 1) {
123     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
124   } else {
125     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
126   }
127   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
128 
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatSolve_SuperLU_DIST"
134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
135 {
136   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
137   PetscErrorCode   ierr;
138   PetscMPIInt      size;
139   PetscInt         m=A->M, N=A->N;
140   SuperLUStat_t    stat;
141   double           berr[1];
142   PetscScalar      *bptr;
143   PetscInt         info, nrhs=1;
144   Vec              x_seq;
145   IS               iden;
146   VecScatter       scat;
147 
148   PetscFunctionBegin;
149   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
150   if (size > 1) {
151     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
152       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
153       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
154       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
155       ierr = ISDestroy(iden);CHKERRQ(ierr);
156 
157       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
158       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
159       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
160     } else { /* distributed mat input */
161       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
162       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
163     }
164   } else { /* size == 1 */
165     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
166     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
167   }
168 
169   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
170 
171   PStatInit(&stat);        /* Initialize the statistics variables. */
172   if (lu->MatInputMode == GLOBAL) {
173 #if defined(PETSC_USE_COMPLEX)
174     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
175                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
176 #else
177     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
178                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
179 #endif
180   } else { /* distributed mat input */
181 #if defined(PETSC_USE_COMPLEX)
182     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
183 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
184     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
185 #else
186     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
187 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
188     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
189 #endif
190   }
191   if (lu->options.PrintStat) {
192      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
193   }
194   PStatFree(&stat);
195 
196   if (size > 1) {
197     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
198       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
199       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
200       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
201       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
202       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
203     } else {
204       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
205     }
206   } else {
207     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
208   }
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
214 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
215 {
216   Mat              *tseq,A_seq = PETSC_NULL;
217   Mat_SeqAIJ       *aa,*bb;
218   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
219   PetscErrorCode   ierr;
220   PetscInt         M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
221                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
222   PetscMPIInt      size,rank;
223   SuperLUStat_t    stat;
224   double           *berr=0;
225   IS               isrow;
226   PetscLogDouble   time0,time,time_min,time_max;
227 #if defined(PETSC_USE_COMPLEX)
228   doublecomplex    *av, *bv;
229 #else
230   double           *av, *bv;
231 #endif
232 
233   PetscFunctionBegin;
234   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
235   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
236 
237   if (lu->options.PrintStat) { /* collect time for mat conversion */
238     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
239     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
240   }
241 
242   if (lu->MatInputMode == GLOBAL) { /* global mat input */
243     if (size > 1) { /* convert mpi A to seq mat A */
244       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
245       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
246       ierr = ISDestroy(isrow);CHKERRQ(ierr);
247 
248       A_seq = *tseq;
249       ierr = PetscFree(tseq);CHKERRQ(ierr);
250       aa =  (Mat_SeqAIJ*)A_seq->data;
251     } else {
252       aa =  (Mat_SeqAIJ*)A->data;
253     }
254 
255     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
256     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
257 #if defined(PETSC_USE_COMPLEX)
258       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
259 #else
260       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
261 #endif
262     } else { /* successive numeric factorization, sparsity pattern is reused. */
263       Destroy_CompCol_Matrix_dist(&lu->A_sup);
264       Destroy_LU(N, &lu->grid, &lu->LUstruct);
265       lu->options.Fact = SamePattern;
266     }
267 #if defined(PETSC_USE_COMPLEX)
268     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
269 #else
270     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
271 #endif
272 
273     /* Create compressed column matrix A_sup. */
274 #if defined(PETSC_USE_COMPLEX)
275     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
276 #else
277     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
278 #endif
279   } else { /* distributed mat input */
280     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
281     aa=(Mat_SeqAIJ*)(mat->A)->data;
282     bb=(Mat_SeqAIJ*)(mat->B)->data;
283     ai=aa->i; aj=aa->j;
284     bi=bb->i; bj=bb->j;
285 #if defined(PETSC_USE_COMPLEX)
286     av=(doublecomplex*)aa->a;
287     bv=(doublecomplex*)bb->a;
288 #else
289     av=aa->a;
290     bv=bb->a;
291 #endif
292     rstart = mat->rstart;
293     nz     = aa->nz + bb->nz;
294     garray = mat->garray;
295     rstart = mat->rstart;
296 
297     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
298 #if defined(PETSC_USE_COMPLEX)
299       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
300 #else
301       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
302 #endif
303     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
304       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
305       Destroy_LU(N, &lu->grid, &lu->LUstruct);
306       lu->options.Fact = SamePattern;
307     }
308     nz = 0; irow = mat->rstart;
309     for ( i=0; i<m; i++ ) {
310       lu->row[i] = nz;
311       countA = ai[i+1] - ai[i];
312       countB = bi[i+1] - bi[i];
313       ajj = aj + ai[i];  /* ptr to the beginning of this row */
314       bjj = bj + bi[i];
315 
316       /* B part, smaller col index */
317       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
318       jB = 0;
319       for (j=0; j<countB; j++){
320         jcol = garray[bjj[j]];
321         if (jcol > colA_start) {
322           jB = j;
323           break;
324         }
325         lu->col[nz] = jcol;
326         lu->val[nz++] = *bv++;
327         if (j==countB-1) jB = countB;
328       }
329 
330       /* A part */
331       for (j=0; j<countA; j++){
332         lu->col[nz] = mat->rstart + ajj[j];
333         lu->val[nz++] = *av++;
334       }
335 
336       /* B part, larger col index */
337       for (j=jB; j<countB; j++){
338         lu->col[nz] = garray[bjj[j]];
339         lu->val[nz++] = *bv++;
340       }
341     }
342     lu->row[m] = nz;
343 #if defined(PETSC_USE_COMPLEX)
344     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
345 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
346 #else
347     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
348 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
349 #endif
350   }
351   if (lu->options.PrintStat) {
352     ierr = PetscGetTime(&time);CHKERRQ(ierr);
353     time0 = time - time0;
354   }
355 
356   /* Factor the matrix. */
357   PStatInit(&stat);   /* Initialize the statistics variables. */
358 
359   if (lu->MatInputMode == GLOBAL) { /* global mat input */
360 #if defined(PETSC_USE_COMPLEX)
361     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
362                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
363 #else
364     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
365                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
366 #endif
367   } else { /* distributed mat input */
368 #if defined(PETSC_USE_COMPLEX)
369     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
370 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
371     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
372 #else
373     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
374 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
375     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
376 #endif
377   }
378 
379   if (lu->MatInputMode == GLOBAL && size > 1){
380     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
381   }
382 
383   if (lu->options.PrintStat) {
384     if (size > 1){
385       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
386       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
387       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
388       time = time/size; /* average time */
389       if (!rank)
390         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
391                               %g / %g / %g\n",time_max,time_min,time);
392     } else {
393       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
394                               %g\n",time0);
395     }
396 
397     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
398   }
399   PStatFree(&stat);
400   (*F)->assembled = PETSC_TRUE;
401   lu->flg         = SAME_NONZERO_PATTERN;
402   PetscFunctionReturn(0);
403 }
404 
405 /* Note the Petsc r and c permutations are ignored */
406 #undef __FUNCT__
407 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
408 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
409 {
410   Mat               B;
411   Mat_SuperLU_DIST  *lu;
412   PetscErrorCode    ierr;
413   PetscInt          M=A->M,N=A->N,indx;
414   PetscMPIInt       size;
415   superlu_options_t options;
416   PetscTruth        flg;
417   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
418   const char        *prtype[] = {"LargeDiag","NATURAL"};
419 
420   PetscFunctionBegin;
421   /* Create the factorization matrix */
422   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
423   ierr = MatSetSizes(B,A->m,A->n,M,N);CHKERRQ(ierr);
424   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
425   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
426   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
427 
428   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
429   B->ops->solve            = MatSolve_SuperLU_DIST;
430   B->factor                = FACTOR_LU;
431 
432   lu = (Mat_SuperLU_DIST*)(B->spptr);
433 
434   /* Set the input options */
435   set_default_options_dist(&options);
436 
437   lu->MatInputMode = GLOBAL;
438   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
439 
440   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
441   lu->nprow = size/2;               /* Default process rows.      */
442   if (!lu->nprow) lu->nprow = 1;
443   lu->npcol = size/lu->nprow;           /* Default process columns.   */
444 
445   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
446 
447     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
448     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
449     if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol");
450 
451     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
452     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
453 
454     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
455     if (!flg) {
456       options.Equil = NO;
457     }
458 
459     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
460     if (flg) {
461       switch (indx) {
462       case 0:
463         options.RowPerm = LargeDiag;
464         break;
465       case 1:
466         options.RowPerm = NOROWPERM;
467         break;
468       }
469     }
470 
471     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
472     if (flg) {
473       switch (indx) {
474       case 0:
475         options.ColPerm = MMD_AT_PLUS_A;
476         break;
477       case 1:
478         options.ColPerm = NATURAL;
479         break;
480       case 2:
481         options.ColPerm = MMD_ATA;
482         break;
483       }
484     }
485 
486     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
487     if (!flg) {
488       options.ReplaceTinyPivot = NO;
489     }
490 
491     options.IterRefine = NOREFINE;
492     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
493     if (flg) {
494       options.IterRefine = DOUBLE;
495     }
496 
497     if (PetscLogPrintInfo) {
498       options.PrintStat = YES;
499     } else {
500       options.PrintStat = NO;
501     }
502     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
503                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
504   PetscOptionsEnd();
505 
506   /* Initialize the SuperLU process grid. */
507   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
508 
509   /* Initialize ScalePermstruct and LUstruct. */
510   ScalePermstructInit(M, N, &lu->ScalePermstruct);
511   LUstructInit(M, N, &lu->LUstruct);
512 
513   lu->options            = options;
514   lu->flg                = DIFFERENT_NONZERO_PATTERN;
515   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
516   *F = B;
517 
518   PetscFunctionReturn(0);
519 }
520 
521 #undef __FUNCT__
522 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
523 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
524   PetscErrorCode   ierr;
525   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
526 
527   PetscFunctionBegin;
528   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
529   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
530   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
531   PetscFunctionReturn(0);
532 }
533 
534 #undef __FUNCT__
535 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
536 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
537 {
538   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
539   superlu_options_t options;
540   PetscErrorCode    ierr;
541 
542   PetscFunctionBegin;
543   /* check if matrix is superlu_dist type */
544   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
545 
546   options = lu->options;
547   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
548   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
549   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
550   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
551   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
552   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
553   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
554   if (options.ColPerm == NATURAL) {
555     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
556   } else if (options.ColPerm == MMD_AT_PLUS_A) {
557     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
558   } else if (options.ColPerm == MMD_ATA) {
559     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
560   } else {
561     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
562   }
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatView_SuperLU_DIST"
568 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
569 {
570   PetscErrorCode    ierr;
571   PetscTruth        iascii;
572   PetscViewerFormat format;
573   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
574 
575   PetscFunctionBegin;
576   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
577 
578   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
579   if (iascii) {
580     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
581     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
582       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
583     }
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 
589 EXTERN_C_BEGIN
590 #undef __FUNCT__
591 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
592 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
593 {
594   /* This routine is only called to convert to MATSUPERLU_DIST */
595   /* from MATSEQAIJ if A has a single process communicator */
596   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
597   PetscErrorCode   ierr;
598   PetscMPIInt      size;
599   MPI_Comm         comm;
600   Mat              B=*newmat;
601   Mat_SuperLU_DIST *lu;
602 
603   PetscFunctionBegin;
604   if (reuse == MAT_INITIAL_MATRIX) {
605     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
606   }
607 
608   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
609   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
610 
611   lu->MatDuplicate         = A->ops->duplicate;
612   lu->MatView              = A->ops->view;
613   lu->MatAssemblyEnd       = A->ops->assemblyend;
614   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
615   lu->MatDestroy           = A->ops->destroy;
616   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
617 
618   B->spptr                 = (void*)lu;
619   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
620   B->ops->view             = MatView_SuperLU_DIST;
621   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
622   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
623   B->ops->destroy          = MatDestroy_SuperLU_DIST;
624   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
625   if (size == 1) {
626     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
627                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
628     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
629                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
630   } else {
631     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
632                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
633     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
634                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
635   }
636   ierr = PetscLogInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr);
637   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
638   *newmat = B;
639   PetscFunctionReturn(0);
640 }
641 EXTERN_C_END
642 
643 #undef __FUNCT__
644 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
645 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
646   PetscErrorCode   ierr;
647   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
648 
649   PetscFunctionBegin;
650   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
651   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 
655 /*MC
656   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
657   via the external package SuperLU_DIST.
658 
659   If SuperLU_DIST is installed (see the manual for
660   instructions on how to declare the existence of external packages),
661   a matrix type can be constructed which invokes SuperLU_DIST solvers.
662   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
663 
664   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
665   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
666   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
667   for communicators controlling multiple processes.  It is recommended that you call both of
668   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
669   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
670   without data copy.
671 
672   Options Database Keys:
673 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
674 . -mat_superlu_dist_r <n> - number of rows in processor partition
675 . -mat_superlu_dist_c <n> - number of columns in processor partition
676 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
677 . -mat_superlu_dist_equil - equilibrate the matrix
678 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
679 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
680 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
681 . -mat_superlu_dist_iterrefine - use iterative refinement
682 - -mat_superlu_dist_statprint - print factorization information
683 
684    Level: beginner
685 
686 .seealso: PCLU
687 M*/
688 
689 EXTERN_C_BEGIN
690 #undef __FUNCT__
691 #define __FUNCT__ "MatCreate_SuperLU_DIST"
692 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
693 {
694   PetscErrorCode ierr;
695   PetscMPIInt    size;
696   Mat            A_diag;
697 
698   PetscFunctionBegin;
699   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
700   /*   and SuperLU_DIST types */
701   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
702   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
703   if (size == 1) {
704     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
705   } else {
706     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
707     A_diag = ((Mat_MPIAIJ *)A->data)->A;
708     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
709   }
710   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
711   PetscFunctionReturn(0);
712 }
713 EXTERN_C_END
714 
715