xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 2a6744eb01855f5aa328eb8fdf4b0d01e72ad151)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum { GLOBAL,DISTRIBUTED
22 } SuperLU_MatInputMode;
23 
24 typedef struct {
25   int_t                   nprow,npcol,*row,*col;
26   gridinfo_t              grid;
27   superlu_options_t       options;
28   SuperMatrix             A_sup;
29   ScalePermstruct_t       ScalePermstruct;
30   LUstruct_t              LUstruct;
31   int                     StatPrint;
32   int                     MatInputMode;
33   SOLVEstruct_t           SOLVEstruct;
34   fact_t                  FactPattern;
35   MPI_Comm                comm_superlu;
36 #if defined(PETSC_USE_COMPLEX)
37   doublecomplex           *val;
38 #else
39   double                  *val;
40 #endif
41 
42   /* A few function pointers for inheritance */
43   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   PetscErrorCode (*MatView)(Mat,PetscViewer);
45   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 
55 EXTERN_C_BEGIN
56 #undef __FUNCT__
57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
59 {
60   PetscErrorCode   ierr;
61   Mat              B=*newmat;
62   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
63 
64   PetscFunctionBegin;
65   if (reuse == MAT_INITIAL_MATRIX) {
66     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
67   }
68   /* Reset the original function pointers */
69   B->ops->duplicate        = lu->MatDuplicate;
70   B->ops->view             = lu->MatView;
71   B->ops->assemblyend      = lu->MatAssemblyEnd;
72   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
73   B->ops->destroy          = lu->MatDestroy;
74 
75   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
76   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
77   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
78   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
79 
80   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
81   *newmat = B;
82   PetscFunctionReturn(0);
83 }
84 EXTERN_C_END
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
88 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
89 {
90   PetscErrorCode   ierr;
91   PetscMPIInt      size;
92   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
93 
94   PetscFunctionBegin;
95   if (lu->CleanUpSuperLU_Dist) {
96     /* Deallocate SuperLU_DIST storage */
97     if (lu->MatInputMode == GLOBAL) {
98       Destroy_CompCol_Matrix_dist(&lu->A_sup);
99     } else {
100       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
101       if ( lu->options.SolveInitialized ) {
102 #if defined(PETSC_USE_COMPLEX)
103         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
104 #else
105         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
106 #endif
107       }
108     }
109     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
110     ScalePermstructFree(&lu->ScalePermstruct);
111     LUstructFree(&lu->LUstruct);
112 
113     /* Release the SuperLU_DIST process grid. */
114     superlu_gridexit(&lu->grid);
115 
116     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
117   }
118 
119   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
120   if (size == 1) {
121     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
122   } else {
123     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
124   }
125   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
126   PetscFunctionReturn(0);
127 }
128 
129 #undef __FUNCT__
130 #define __FUNCT__ "MatSolve_SuperLU_DIST"
131 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
132 {
133   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
134   PetscErrorCode   ierr;
135   PetscMPIInt      size;
136   PetscInt         m=A->rmap.N, N=A->cmap.N;
137   SuperLUStat_t    stat;
138   double           berr[1];
139   PetscScalar      *bptr;
140   PetscInt         info, nrhs=1;
141   Vec              x_seq;
142   IS               iden;
143   VecScatter       scat;
144 
145   PetscFunctionBegin;
146   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
147   if (size > 1) {
148     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
149       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
150       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
151       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
152       ierr = ISDestroy(iden);CHKERRQ(ierr);
153 
154       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
155       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
156       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
157     } else { /* distributed mat input */
158       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
159       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
160     }
161   } else { /* size == 1 */
162     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
163     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
164   }
165 
166   if (lu->options.Fact != FACTORED)
167     SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
168 
169   PStatInit(&stat);        /* Initialize the statistics variables. */
170   if (lu->MatInputMode == GLOBAL) {
171 #if defined(PETSC_USE_COMPLEX)
172     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
173                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
174 #else
175     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
176                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
177 #endif
178   } else { /* distributed mat input */
179 #if defined(PETSC_USE_COMPLEX)
180     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
181 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
182     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
183 #else
184     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
185 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
186     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
187 #endif
188   }
189   if (lu->options.PrintStat) {
190      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
191   }
192   PStatFree(&stat);
193 
194   if (size > 1) {
195     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
196       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
197       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
198       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
199       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
200       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
201     } else {
202       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
203     }
204   } else {
205     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
206   }
207   PetscFunctionReturn(0);
208 }
209 
210 #undef __FUNCT__
211 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
212 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
213 {
214   Mat              *tseq,A_seq = PETSC_NULL;
215   Mat_SeqAIJ       *aa,*bb;
216   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
217   PetscErrorCode   ierr;
218   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
219                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
220   PetscMPIInt      size,rank;
221   SuperLUStat_t    stat;
222   double           *berr=0;
223   IS               isrow;
224   PetscLogDouble   time0,time,time_min,time_max;
225   Mat              F_diag=PETSC_NULL;
226 #if defined(PETSC_USE_COMPLEX)
227   doublecomplex    *av, *bv;
228 #else
229   double           *av, *bv;
230 #endif
231 
232   PetscFunctionBegin;
233   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
234   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
235 
236   if (lu->options.PrintStat) { /* collect time for mat conversion */
237     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
238     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
239   }
240 
241   if (lu->MatInputMode == GLOBAL) { /* global mat input */
242     if (size > 1) { /* convert mpi A to seq mat A */
243       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
244       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
245       ierr = ISDestroy(isrow);CHKERRQ(ierr);
246 
247       A_seq = *tseq;
248       ierr = PetscFree(tseq);CHKERRQ(ierr);
249       aa =  (Mat_SeqAIJ*)A_seq->data;
250     } else {
251       aa =  (Mat_SeqAIJ*)A->data;
252     }
253 
254     /* Convert Petsc NR matrix to SuperLU_DIST NC.
255        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
256     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
257       if (lu->FactPattern == SamePattern_SameRowPerm){
258         Destroy_CompCol_Matrix_dist(&lu->A_sup);
259         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
260         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
261       } else {
262         Destroy_CompCol_Matrix_dist(&lu->A_sup);
263         Destroy_LU(N, &lu->grid, &lu->LUstruct);
264         lu->options.Fact = SamePattern;
265       }
266     }
267 #if defined(PETSC_USE_COMPLEX)
268     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
269 #else
270     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
271 #endif
272 
273     /* Create compressed column matrix A_sup. */
274 #if defined(PETSC_USE_COMPLEX)
275     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
276 #else
277     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
278 #endif
279   } else { /* distributed mat input */
280     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
281     aa=(Mat_SeqAIJ*)(mat->A)->data;
282     bb=(Mat_SeqAIJ*)(mat->B)->data;
283     ai=aa->i; aj=aa->j;
284     bi=bb->i; bj=bb->j;
285 #if defined(PETSC_USE_COMPLEX)
286     av=(doublecomplex*)aa->a;
287     bv=(doublecomplex*)bb->a;
288 #else
289     av=aa->a;
290     bv=bb->a;
291 #endif
292     rstart = A->rmap.rstart;
293     nz     = aa->nz + bb->nz;
294     garray = mat->garray;
295 
296     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
297 #if defined(PETSC_USE_COMPLEX)
298       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
299 #else
300       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
301 #endif
302     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
303       if (lu->FactPattern == SamePattern_SameRowPerm){
304         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
305         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
306       } else {
307         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
308         lu->options.Fact = SamePattern;
309       }
310     }
311     nz = 0; irow = rstart;
312     for ( i=0; i<m; i++ ) {
313       lu->row[i] = nz;
314       countA = ai[i+1] - ai[i];
315       countB = bi[i+1] - bi[i];
316       ajj = aj + ai[i];  /* ptr to the beginning of this row */
317       bjj = bj + bi[i];
318 
319       /* B part, smaller col index */
320       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
321       jB = 0;
322       for (j=0; j<countB; j++){
323         jcol = garray[bjj[j]];
324         if (jcol > colA_start) {
325           jB = j;
326           break;
327         }
328         lu->col[nz] = jcol;
329         lu->val[nz++] = *bv++;
330         if (j==countB-1) jB = countB;
331       }
332 
333       /* A part */
334       for (j=0; j<countA; j++){
335         lu->col[nz] = rstart + ajj[j];
336         lu->val[nz++] = *av++;
337       }
338 
339       /* B part, larger col index */
340       for (j=jB; j<countB; j++){
341         lu->col[nz] = garray[bjj[j]];
342         lu->val[nz++] = *bv++;
343       }
344     }
345     lu->row[m] = nz;
346 #if defined(PETSC_USE_COMPLEX)
347     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
348 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
349 #else
350     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
351 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
352 #endif
353   }
354   if (lu->options.PrintStat) {
355     ierr = PetscGetTime(&time);CHKERRQ(ierr);
356     time0 = time - time0;
357   }
358 
359   /* Factor the matrix. */
360   PStatInit(&stat);   /* Initialize the statistics variables. */
361 
362   if (lu->MatInputMode == GLOBAL) { /* global mat input */
363 #if defined(PETSC_USE_COMPLEX)
364     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
365                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
366 #else
367     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
368                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
369 #endif
370   } else { /* distributed mat input */
371 #if defined(PETSC_USE_COMPLEX)
372     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
373 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
374     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
375 #else
376     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
377 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
378     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
379 #endif
380   }
381 
382   if (lu->MatInputMode == GLOBAL && size > 1){
383     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
384   }
385 
386   if (lu->options.PrintStat) {
387     if (size > 1){
388       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
389       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
390       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
391       time = time/size; /* average time */
392       if (!rank)
393         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
394                               %g / %g / %g\n",time_max,time_min,time);
395     } else {
396       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
397                               %g\n",time0);
398     }
399 
400     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
401   }
402   PStatFree(&stat);
403   if (size > 1){
404     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
405     F_diag->assembled = PETSC_TRUE;
406   }
407   (*F)->assembled  = PETSC_TRUE;
408   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
409   PetscFunctionReturn(0);
410 }
411 
412 /* Note the Petsc r and c permutations are ignored */
413 #undef __FUNCT__
414 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
415 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
416 {
417   Mat               B;
418   Mat_SuperLU_DIST  *lu;
419   PetscErrorCode    ierr;
420   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
421   PetscMPIInt       size;
422   superlu_options_t options;
423   PetscTruth        flg;
424   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
425   const char        *prtype[] = {"LargeDiag","NATURAL"};
426   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
427 
428   PetscFunctionBegin;
429   /* Create the factorization matrix */
430   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
431   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
432   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
433   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
434   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
435 
436   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
437   B->ops->solve            = MatSolve_SuperLU_DIST;
438   B->factor                = FACTOR_LU;
439 
440   lu = (Mat_SuperLU_DIST*)(B->spptr);
441 
442   /*   Set the default input options:
443         options.Fact = DOFACT;
444         options.Equil = YES;
445         options.ColPerm = MMD_AT_PLUS_A;
446         options.RowPerm = LargeDiag;
447         options.ReplaceTinyPivot = YES;
448         options.Trans = NOTRANS;
449         options.IterRefine = DOUBLE;
450         options.SolveInitialized = NO;
451         options.RefineInitialized = NO;
452         options.PrintStat = YES;
453   */
454   set_default_options_dist(&options);
455 
456   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
457   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
458 
459   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
460     lu->npcol = (PetscMPIInt)(sqrt(size)); /* Default num of process columns */
461     if (!lu->npcol) lu->npcol = 1;
462     lu->nprow = (PetscMPIInt)(size/lu->npcol);        /* Default num of process rows */
463     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
464     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
465     if (size != lu->nprow * lu->npcol)
466       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
467 
468     lu->MatInputMode = DISTRIBUTED;
469     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
470     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
471 
472     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
473     if (!flg) {
474       options.Equil = NO;
475     }
476 
477     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
478     if (flg) {
479       switch (indx) {
480       case 0:
481         options.RowPerm = LargeDiag;
482         break;
483       case 1:
484         options.RowPerm = NOROWPERM;
485         break;
486       }
487     }
488 
489     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);CHKERRQ(ierr);
490     if (flg) {
491       switch (indx) {
492       case 0:
493         options.ColPerm = MMD_AT_PLUS_A;
494         break;
495       case 1:
496         options.ColPerm = NATURAL;
497         break;
498       case 2:
499         options.ColPerm = MMD_ATA;
500         break;
501       }
502     }
503 
504     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
505     if (!flg) {
506       options.ReplaceTinyPivot = NO;
507     }
508 
509     lu->FactPattern = SamePattern;
510     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
511     if (flg) {
512       switch (indx) {
513       case 0:
514         lu->FactPattern = SamePattern;
515         break;
516       case 1:
517         lu->FactPattern = SamePattern_SameRowPerm;
518         break;
519       }
520     }
521 
522     options.IterRefine = NOREFINE;
523     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
524     if (flg) {
525       options.IterRefine = DOUBLE;
526     }
527 
528     if (PetscLogPrintInfo) {
529       options.PrintStat = YES;
530     } else {
531       options.PrintStat = NO;
532     }
533     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
534                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
535   PetscOptionsEnd();
536 
537   /* Initialize the SuperLU process grid. */
538   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
539 
540   /* Initialize ScalePermstruct and LUstruct. */
541   ScalePermstructInit(M, N, &lu->ScalePermstruct);
542   LUstructInit(M, N, &lu->LUstruct);
543 
544   lu->options             = options;
545   lu->options.Fact        = DOFACT;
546   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
547   *F = B;
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
553 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
554   PetscErrorCode   ierr;
555   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
556 
557   PetscFunctionBegin;
558   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
559   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
560   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
566 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
567 {
568   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
569   superlu_options_t options;
570   PetscErrorCode    ierr;
571 
572   PetscFunctionBegin;
573   /* check if matrix is superlu_dist type */
574   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
575 
576   options = lu->options;
577   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
578   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
579   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
580   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
581   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
582   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
583   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
584   if (options.ColPerm == NATURAL) {
585     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
586   } else if (options.ColPerm == MMD_AT_PLUS_A) {
587     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
588   } else if (options.ColPerm == MMD_ATA) {
589     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
590   } else {
591     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
592   }
593 
594   if (lu->FactPattern == SamePattern){
595     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
596   } else {
597     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
598   }
599   PetscFunctionReturn(0);
600 }
601 
602 #undef __FUNCT__
603 #define __FUNCT__ "MatView_SuperLU_DIST"
604 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
605 {
606   PetscErrorCode    ierr;
607   PetscTruth        iascii;
608   PetscViewerFormat format;
609   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
610 
611   PetscFunctionBegin;
612   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
613 
614   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
615   if (iascii) {
616     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
617     if (format == PETSC_VIEWER_ASCII_INFO) {
618       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
619     }
620   }
621   PetscFunctionReturn(0);
622 }
623 
624 
625 EXTERN_C_BEGIN
626 #undef __FUNCT__
627 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
628 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
629 {
630   /* This routine is only called to convert to MATSUPERLU_DIST */
631   /* from MATSEQAIJ if A has a single process communicator */
632   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
633   PetscErrorCode   ierr;
634   PetscMPIInt      size;
635   MPI_Comm         comm;
636   Mat              B=*newmat;
637   Mat_SuperLU_DIST *lu;
638 
639   PetscFunctionBegin;
640   if (reuse == MAT_INITIAL_MATRIX) {
641     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
642   }
643 
644   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
645   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
646 
647   lu->MatDuplicate         = A->ops->duplicate;
648   lu->MatView              = A->ops->view;
649   lu->MatAssemblyEnd       = A->ops->assemblyend;
650   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
651   lu->MatDestroy           = A->ops->destroy;
652   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
653 
654   B->spptr                 = (void*)lu;
655   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
656   B->ops->view             = MatView_SuperLU_DIST;
657   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
658   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
659   B->ops->destroy          = MatDestroy_SuperLU_DIST;
660   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
661   if (size == 1) {
662     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
663                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
664     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
665                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
666   } else {
667     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
668                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
669     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
670                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
671   }
672   ierr = PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr);
673   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
674   *newmat = B;
675   PetscFunctionReturn(0);
676 }
677 EXTERN_C_END
678 
679 #undef __FUNCT__
680 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
681 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
682   PetscErrorCode   ierr;
683   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
684 
685   PetscFunctionBegin;
686   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
687   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
688   PetscFunctionReturn(0);
689 }
690 
691 /*MC
692   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
693   via the external package SuperLU_DIST.
694 
695   If SuperLU_DIST is installed (see the manual for
696   instructions on how to declare the existence of external packages),
697   a matrix type can be constructed which invokes SuperLU_DIST solvers.
698   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
699 
700   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
701   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
702   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
703   for communicators controlling multiple processes.  It is recommended that you call both of
704   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
705   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
706   without data copy.
707 
708   Options Database Keys:
709 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
710 . -mat_superlu_dist_r <n> - number of rows in processor partition
711 . -mat_superlu_dist_c <n> - number of columns in processor partition
712 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
713 . -mat_superlu_dist_equil - equilibrate the matrix
714 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
715 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
716 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
717 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
718 . -mat_superlu_dist_iterrefine - use iterative refinement
719 - -mat_superlu_dist_statprint - print factorization information
720 
721    Level: beginner
722 
723 .seealso: PCLU
724 M*/
725 
726 EXTERN_C_BEGIN
727 #undef __FUNCT__
728 #define __FUNCT__ "MatCreate_SuperLU_DIST"
729 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
730 {
731   PetscErrorCode ierr;
732   PetscMPIInt    size;
733 
734   PetscFunctionBegin;
735   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
736   /*   and SuperLU_DIST types */
737   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
738   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
739   if (size == 1) {
740     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
741   } else {
742     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
743     /*  A_diag = 0x0 ???  -- do we need it?
744     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
745     ierr = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
746     */
747   }
748   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
749   PetscFunctionReturn(0);
750 }
751 EXTERN_C_END
752 
753