xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision a4fd02ac6712e7c4a27c9ffdf1171591c41a72f0)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   fact_t                  FactPattern;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* A few function pointers for inheritance */
42   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   PetscErrorCode (*MatView)(Mat,PetscViewer);
44   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   PetscErrorCode (*MatDestroy)(Mat);
47 
48   /* Flag to clean up (non-global) SuperLU objects during Destroy */
49   PetscTruth CleanUpSuperLU_Dist;
50 } Mat_SuperLU_DIST;
51 
52 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
53 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
54 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *);
55 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
56 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
57 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType);
58 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
59 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *);
60 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *);
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ"
65 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
66 {
67   PetscErrorCode   ierr;
68   Mat              B=*newmat;
69   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
70 
71   PetscFunctionBegin;
72   if (reuse == MAT_INITIAL_MATRIX) {
73     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
74   }
75   /* Reset the original function pointers */
76   B->ops->duplicate        = lu->MatDuplicate;
77   B->ops->view             = lu->MatView;
78   B->ops->assemblyend      = lu->MatAssemblyEnd;
79   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
80   B->ops->destroy          = lu->MatDestroy;
81   ierr     = PetscFree(lu);CHKERRQ(ierr);
82   A->spptr = PETSC_NULL;
83 
84   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
85   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
86   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
87   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
88 
89   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
90   *newmat = B;
91   PetscFunctionReturn(0);
92 }
93 EXTERN_C_END
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
97 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
98 {
99   PetscErrorCode   ierr;
100   PetscMPIInt      size;
101   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
102 
103   PetscFunctionBegin;
104   if (lu->CleanUpSuperLU_Dist) {
105     /* Deallocate SuperLU_DIST storage */
106     if (lu->MatInputMode == GLOBAL) {
107       Destroy_CompCol_Matrix_dist(&lu->A_sup);
108     } else {
109       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
110       if ( lu->options.SolveInitialized ) {
111 #if defined(PETSC_USE_COMPLEX)
112         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
113 #else
114         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
115 #endif
116       }
117     }
118     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
119     ScalePermstructFree(&lu->ScalePermstruct);
120     LUstructFree(&lu->LUstruct);
121 
122     /* Release the SuperLU_DIST process grid. */
123     superlu_gridexit(&lu->grid);
124 
125     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
126   }
127 
128   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
129   if (size == 1) {
130     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
131   } else {
132     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
133   }
134   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSolve_SuperLU_DIST"
140 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
141 {
142   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
143   PetscErrorCode   ierr;
144   PetscMPIInt      size;
145   PetscInt         m=A->rmap.N, N=A->cmap.N;
146   SuperLUStat_t    stat;
147   double           berr[1];
148   PetscScalar      *bptr;
149   PetscInt         info, nrhs=1;
150   Vec              x_seq;
151   IS               iden;
152   VecScatter       scat;
153 
154   PetscFunctionBegin;
155   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
156   if (size > 1) {
157     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
158       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
159       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
160       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
161       ierr = ISDestroy(iden);CHKERRQ(ierr);
162 
163       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
164       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
165       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
166     } else { /* distributed mat input */
167       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
168       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
169     }
170   } else { /* size == 1 */
171     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
172     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
173   }
174 
175   if (lu->options.Fact != FACTORED)
176     SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
177 
178   PStatInit(&stat);        /* Initialize the statistics variables. */
179   if (lu->MatInputMode == GLOBAL) {
180 #if defined(PETSC_USE_COMPLEX)
181     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
182                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
183 #else
184     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
185                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
186 #endif
187   } else { /* distributed mat input */
188 #if defined(PETSC_USE_COMPLEX)
189     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
190 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
191     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
192 #else
193     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
194 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
195     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
196 #endif
197   }
198   if (lu->options.PrintStat) {
199      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
200   }
201   PStatFree(&stat);
202 
203   if (size > 1) {
204     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
205       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
206       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
207       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
208       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
209       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
210     } else {
211       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
212     }
213   } else {
214     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
215   }
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
221 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
222 {
223   Mat              *tseq,A_seq = PETSC_NULL;
224   Mat_SeqAIJ       *aa,*bb;
225   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
226   PetscErrorCode   ierr;
227   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
228                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
229   PetscMPIInt      size,rank;
230   SuperLUStat_t    stat;
231   double           *berr=0;
232   IS               isrow;
233   PetscLogDouble   time0,time,time_min,time_max;
234   Mat              F_diag=PETSC_NULL;
235 #if defined(PETSC_USE_COMPLEX)
236   doublecomplex    *av, *bv;
237 #else
238   double           *av, *bv;
239 #endif
240 
241   PetscFunctionBegin;
242   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
243   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
244 
245   if (lu->options.PrintStat) { /* collect time for mat conversion */
246     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
247     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
248   }
249 
250   if (lu->MatInputMode == GLOBAL) { /* global mat input */
251     if (size > 1) { /* convert mpi A to seq mat A */
252       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
253       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
254       ierr = ISDestroy(isrow);CHKERRQ(ierr);
255 
256       A_seq = *tseq;
257       ierr = PetscFree(tseq);CHKERRQ(ierr);
258       aa =  (Mat_SeqAIJ*)A_seq->data;
259     } else {
260       aa =  (Mat_SeqAIJ*)A->data;
261     }
262 
263     /* Convert Petsc NR matrix to SuperLU_DIST NC.
264        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
265     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
266       if (lu->FactPattern == SamePattern_SameRowPerm){
267         Destroy_CompCol_Matrix_dist(&lu->A_sup);
268         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
269         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
270       } else {
271         Destroy_CompCol_Matrix_dist(&lu->A_sup);
272         Destroy_LU(N, &lu->grid, &lu->LUstruct);
273         lu->options.Fact = SamePattern;
274       }
275     }
276 #if defined(PETSC_USE_COMPLEX)
277     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
278 #else
279     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
280 #endif
281 
282     /* Create compressed column matrix A_sup. */
283 #if defined(PETSC_USE_COMPLEX)
284     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
285 #else
286     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
287 #endif
288   } else { /* distributed mat input */
289     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
290     aa=(Mat_SeqAIJ*)(mat->A)->data;
291     bb=(Mat_SeqAIJ*)(mat->B)->data;
292     ai=aa->i; aj=aa->j;
293     bi=bb->i; bj=bb->j;
294 #if defined(PETSC_USE_COMPLEX)
295     av=(doublecomplex*)aa->a;
296     bv=(doublecomplex*)bb->a;
297 #else
298     av=aa->a;
299     bv=bb->a;
300 #endif
301     rstart = A->rmap.rstart;
302     nz     = aa->nz + bb->nz;
303     garray = mat->garray;
304 
305     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
306 #if defined(PETSC_USE_COMPLEX)
307       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
308 #else
309       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
310 #endif
311     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
312       if (lu->FactPattern == SamePattern_SameRowPerm){
313         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
314         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
315       } else {
316         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
317         lu->options.Fact = SamePattern;
318       }
319     }
320     nz = 0; irow = rstart;
321     for ( i=0; i<m; i++ ) {
322       lu->row[i] = nz;
323       countA = ai[i+1] - ai[i];
324       countB = bi[i+1] - bi[i];
325       ajj = aj + ai[i];  /* ptr to the beginning of this row */
326       bjj = bj + bi[i];
327 
328       /* B part, smaller col index */
329       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
330       jB = 0;
331       for (j=0; j<countB; j++){
332         jcol = garray[bjj[j]];
333         if (jcol > colA_start) {
334           jB = j;
335           break;
336         }
337         lu->col[nz] = jcol;
338         lu->val[nz++] = *bv++;
339         if (j==countB-1) jB = countB;
340       }
341 
342       /* A part */
343       for (j=0; j<countA; j++){
344         lu->col[nz] = rstart + ajj[j];
345         lu->val[nz++] = *av++;
346       }
347 
348       /* B part, larger col index */
349       for (j=jB; j<countB; j++){
350         lu->col[nz] = garray[bjj[j]];
351         lu->val[nz++] = *bv++;
352       }
353     }
354     lu->row[m] = nz;
355 #if defined(PETSC_USE_COMPLEX)
356     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
357 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
358 #else
359     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
360 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
361 #endif
362   }
363   if (lu->options.PrintStat) {
364     ierr = PetscGetTime(&time);CHKERRQ(ierr);
365     time0 = time - time0;
366   }
367 
368   /* Factor the matrix. */
369   PStatInit(&stat);   /* Initialize the statistics variables. */
370 
371   if (lu->MatInputMode == GLOBAL) { /* global mat input */
372 #if defined(PETSC_USE_COMPLEX)
373     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
374                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
375 #else
376     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
377                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
378 #endif
379   } else { /* distributed mat input */
380 #if defined(PETSC_USE_COMPLEX)
381     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
382 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
383     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
384 #else
385     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
386 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
387     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
388 #endif
389   }
390 
391   if (lu->MatInputMode == GLOBAL && size > 1){
392     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
393   }
394 
395   if (lu->options.PrintStat) {
396     if (size > 1){
397       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
398       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
399       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
400       time = time/size; /* average time */
401       if (!rank)
402         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
403                               %g / %g / %g\n",time_max,time_min,time);
404     } else {
405       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
406                               %g\n",time0);
407     }
408 
409     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
410   }
411   PStatFree(&stat);
412   if (size > 1){
413     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
414     F_diag->assembled = PETSC_TRUE;
415   }
416   (*F)->assembled  = PETSC_TRUE;
417   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
418   PetscFunctionReturn(0);
419 }
420 
421 /* Note the Petsc r and c permutations are ignored */
422 #undef __FUNCT__
423 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
424 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
425 {
426   Mat               B;
427   Mat_SuperLU_DIST  *lu;
428   PetscErrorCode    ierr;
429   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
430   PetscMPIInt       size;
431   superlu_options_t options;
432   PetscTruth        flg;
433   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
434   const char        *prtype[] = {"LargeDiag","NATURAL"};
435   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
436 
437   PetscFunctionBegin;
438   /* Create the factorization matrix */
439   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
440   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
441   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
442   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
443   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
444 
445   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
446   B->ops->solve            = MatSolve_SuperLU_DIST;
447   B->factor                = FACTOR_LU;
448 
449   lu = (Mat_SuperLU_DIST*)(B->spptr);
450 
451   /*   Set the default input options:
452         options.Fact = DOFACT;
453         options.Equil = YES;
454         options.ColPerm = MMD_AT_PLUS_A;
455         options.RowPerm = LargeDiag;
456         options.ReplaceTinyPivot = YES;
457         options.Trans = NOTRANS;
458         options.IterRefine = DOUBLE;
459         options.SolveInitialized = NO;
460         options.RefineInitialized = NO;
461         options.PrintStat = YES;
462   */
463   set_default_options_dist(&options);
464 
465   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
466   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
467   /* Default num of process columns and rows */
468   lu->npcol = (PetscMPIInt)(0.5 + sqrt((PetscReal)size));
469   if (!lu->npcol) lu->npcol = 1;
470   while (lu->npcol > 0) {
471     lu->nprow = (PetscMPIInt)(size/lu->npcol);
472     if (size == lu->nprow * lu->npcol) break;
473     lu->npcol --;
474   }
475 
476   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
477     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
478     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
479     if (size != lu->nprow * lu->npcol)
480       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
481 
482     lu->MatInputMode = DISTRIBUTED;
483     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
484     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
485 
486     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
487     if (!flg) {
488       options.Equil = NO;
489     }
490 
491     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
492     if (flg) {
493       switch (indx) {
494       case 0:
495         options.RowPerm = LargeDiag;
496         break;
497       case 1:
498         options.RowPerm = NOROWPERM;
499         break;
500       }
501     }
502 
503     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);CHKERRQ(ierr);
504     if (flg) {
505       switch (indx) {
506       case 0:
507         options.ColPerm = MMD_AT_PLUS_A;
508         break;
509       case 1:
510         options.ColPerm = NATURAL;
511         break;
512       case 2:
513         options.ColPerm = MMD_ATA;
514         break;
515       }
516     }
517 
518     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
519     if (!flg) {
520       options.ReplaceTinyPivot = NO;
521     }
522 
523     lu->FactPattern = SamePattern;
524     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
525     if (flg) {
526       switch (indx) {
527       case 0:
528         lu->FactPattern = SamePattern;
529         break;
530       case 1:
531         lu->FactPattern = SamePattern_SameRowPerm;
532         break;
533       }
534     }
535 
536     options.IterRefine = NOREFINE;
537     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
538     if (flg) {
539       options.IterRefine = DOUBLE;
540     }
541 
542     if (PetscLogPrintInfo) {
543       options.PrintStat = YES;
544     } else {
545       options.PrintStat = NO;
546     }
547     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
548                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
549   PetscOptionsEnd();
550 
551   /* Initialize the SuperLU process grid. */
552   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
553 
554   /* Initialize ScalePermstruct and LUstruct. */
555   ScalePermstructInit(M, N, &lu->ScalePermstruct);
556   LUstructInit(M, N, &lu->LUstruct);
557 
558   lu->options             = options;
559   lu->options.Fact        = DOFACT;
560   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
561   *F = B;
562   PetscFunctionReturn(0);
563 }
564 
565 #undef __FUNCT__
566 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
567 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
568   PetscErrorCode   ierr;
569   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
570 
571   PetscFunctionBegin;
572   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
573   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
574   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
575   PetscFunctionReturn(0);
576 }
577 
578 #undef __FUNCT__
579 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
580 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
581 {
582   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
583   superlu_options_t options;
584   PetscErrorCode    ierr;
585 
586   PetscFunctionBegin;
587   /* check if matrix is superlu_dist type */
588   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
589 
590   options = lu->options;
591   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
592   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
593   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
594   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
595   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
596   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
597   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
598   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
599   if (options.ColPerm == NATURAL) {
600     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
601   } else if (options.ColPerm == MMD_AT_PLUS_A) {
602     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
603   } else if (options.ColPerm == MMD_ATA) {
604     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
605   } else {
606     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
607   }
608 
609   if (lu->FactPattern == SamePattern){
610     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
611   } else {
612     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
613   }
614   PetscFunctionReturn(0);
615 }
616 
617 #undef __FUNCT__
618 #define __FUNCT__ "MatView_SuperLU_DIST"
619 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
620 {
621   PetscErrorCode    ierr;
622   PetscTruth        iascii;
623   PetscViewerFormat format;
624   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
625 
626   PetscFunctionBegin;
627   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
628 
629   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
630   if (iascii) {
631     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
632     if (format == PETSC_VIEWER_ASCII_INFO) {
633       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
634     }
635   }
636   PetscFunctionReturn(0);
637 }
638 
639 
640 EXTERN_C_BEGIN
641 #undef __FUNCT__
642 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST"
643 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
644 {
645   /* This routine is only called to convert to MATSUPERLU_DIST */
646   /* from MATSEQAIJ if A has a single process communicator */
647   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
648   PetscErrorCode   ierr;
649   PetscMPIInt      size;
650   MPI_Comm         comm;
651   Mat              B=*newmat;
652   Mat_SuperLU_DIST *lu;
653 
654   PetscFunctionBegin;
655   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
656   if (reuse == MAT_INITIAL_MATRIX) {
657     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
658     lu->MatDuplicate         = B->ops->duplicate;
659     lu->MatView              = B->ops->view;
660     lu->MatAssemblyEnd       = B->ops->assemblyend;
661     lu->MatLUFactorSymbolic  = B->ops->lufactorsymbolic;
662     lu->MatDestroy           = B->ops->destroy;
663   } else {
664     lu->MatDuplicate         = A->ops->duplicate;
665     lu->MatView              = A->ops->view;
666     lu->MatAssemblyEnd       = A->ops->assemblyend;
667     lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
668     lu->MatDestroy           = A->ops->destroy;
669   }
670   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
671 
672   B->spptr                 = (void*)lu;
673   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
674   B->ops->view             = MatView_SuperLU_DIST;
675   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
676   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
677   B->ops->destroy          = MatDestroy_SuperLU_DIST;
678 
679   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
680   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
681   if (size == 1) {
682     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
683     "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
684     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
685     "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
686   } else {
687     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
688                                              "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
689     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
690                                              "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
691   }
692   ierr = PetscInfo(A,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr);
693   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
694   *newmat = B;
695   PetscFunctionReturn(0);
696 }
697 EXTERN_C_END
698 
699 #undef __FUNCT__
700 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
701 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
702   PetscErrorCode   ierr;
703   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
704 
705   PetscFunctionBegin;
706   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
707   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
708   PetscFunctionReturn(0);
709 }
710 
711 /*MC
712   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
713   via the external package SuperLU_DIST.
714 
715   If SuperLU_DIST is installed (see the manual for
716   instructions on how to declare the existence of external packages),
717   a matrix type can be constructed which invokes SuperLU_DIST solvers.
718   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
719   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
720   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
721 
722   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
723   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
724   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
725   for communicators controlling multiple processes.  It is recommended that you call both of
726   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
727   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
728   without data copy; but this MUST be called AFTER the matrix values are set.
729 
730 
731 
732   Options Database Keys:
733 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
734 . -mat_superlu_dist_r <n> - number of rows in processor partition
735 . -mat_superlu_dist_c <n> - number of columns in processor partition
736 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
737 . -mat_superlu_dist_equil - equilibrate the matrix
738 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
739 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
740 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
741 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
742 . -mat_superlu_dist_iterrefine - use iterative refinement
743 - -mat_superlu_dist_statprint - print factorization information
744 
745    Level: beginner
746 
747 .seealso: PCLU
748 M*/
749 
750 EXTERN_C_BEGIN
751 #undef __FUNCT__
752 #define __FUNCT__ "MatCreate_SuperLU_DIST"
753 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
754 {
755   PetscErrorCode ierr;
756   PetscMPIInt    size;
757 
758   PetscFunctionBegin;
759   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
760   if (size == 1) {
761     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
762   } else {
763     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
764     /*  A_diag = 0x0 ???  -- do we need it?
765     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
766     ierr = MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
767     */
768   }
769   ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
770   PetscFunctionReturn(0);
771 }
772 EXTERN_C_END
773 
774