xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 93e9e51b86eda45202704d308f609a7f204a28ec)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   fact_t                  FactPattern;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* A few function pointers for inheritance */
42   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   PetscErrorCode (*MatView)(Mat,PetscViewer);
44   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   PetscErrorCode (*MatDestroy)(Mat);
47 
48   /* Flag to clean up (non-global) SuperLU objects during Destroy */
49   PetscTruth CleanUpSuperLU_Dist;
50 } Mat_SuperLU_DIST;
51 
52 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
53 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
54 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *);
55 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
56 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
57 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType);
58 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
59 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *);
60 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *);
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ"
65 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
66 {
67   PetscErrorCode   ierr;
68   Mat              B=*newmat;
69   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
70 
71   PetscFunctionBegin;
72   if (reuse == MAT_INITIAL_MATRIX) {
73     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
74   }
75   /* Reset the original function pointers */
76   B->ops->duplicate        = lu->MatDuplicate;
77   B->ops->view             = lu->MatView;
78   B->ops->assemblyend      = lu->MatAssemblyEnd;
79   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
80   B->ops->destroy          = lu->MatDestroy;
81   ierr     = PetscFree(lu);CHKERRQ(ierr);
82   A->spptr = PETSC_NULL;
83 
84   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
85   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
86   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
87   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
88 
89   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
90   *newmat = B;
91   PetscFunctionReturn(0);
92 }
93 EXTERN_C_END
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
97 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
98 {
99   PetscErrorCode   ierr;
100   PetscMPIInt      size;
101   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
102 
103   PetscFunctionBegin;
104   if (lu->CleanUpSuperLU_Dist) {
105     /* Deallocate SuperLU_DIST storage */
106     if (lu->MatInputMode == GLOBAL) {
107       Destroy_CompCol_Matrix_dist(&lu->A_sup);
108     } else {
109       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
110       if ( lu->options.SolveInitialized ) {
111 #if defined(PETSC_USE_COMPLEX)
112         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
113 #else
114         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
115 #endif
116       }
117     }
118     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
119     ScalePermstructFree(&lu->ScalePermstruct);
120     LUstructFree(&lu->LUstruct);
121 
122     /* Release the SuperLU_DIST process grid. */
123     superlu_gridexit(&lu->grid);
124 
125     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
126   }
127 
128   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
129   if (size == 1) {
130     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
131   } else {
132     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
133   }
134   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSolve_SuperLU_DIST"
140 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
141 {
142   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
143   PetscErrorCode   ierr;
144   PetscMPIInt      size;
145   PetscInt         m=A->rmap.N, N=A->cmap.N;
146   SuperLUStat_t    stat;
147   double           berr[1];
148   PetscScalar      *bptr;
149   PetscInt         info, nrhs=1;
150   Vec              x_seq;
151   IS               iden;
152   VecScatter       scat;
153 
154   PetscFunctionBegin;
155   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
156   if (size > 1) {
157     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
158       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
159       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
160       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
161       ierr = ISDestroy(iden);CHKERRQ(ierr);
162 
163       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
164       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
165       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
166     } else { /* distributed mat input */
167       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
168       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
169     }
170   } else { /* size == 1 */
171     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
172     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
173   }
174 
175   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
176 
177   PStatInit(&stat);        /* Initialize the statistics variables. */
178   if (lu->MatInputMode == GLOBAL) {
179 #if defined(PETSC_USE_COMPLEX)
180     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
181                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
182 #else
183     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
184                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
185 #endif
186   } else { /* distributed mat input */
187 #if defined(PETSC_USE_COMPLEX)
188     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
189 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
190     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
191 #else
192     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
193 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
194     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
195 #endif
196   }
197   if (lu->options.PrintStat) {
198      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
199   }
200   PStatFree(&stat);
201 
202   if (size > 1) {
203     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
204       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
205       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
206       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
207       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
208       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
209     } else {
210       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
211     }
212   } else {
213     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
220 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
221 {
222   Mat              *tseq,A_seq = PETSC_NULL;
223   Mat_SeqAIJ       *aa,*bb;
224   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
225   PetscErrorCode   ierr;
226   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
227                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
228   PetscMPIInt      size,rank;
229   SuperLUStat_t    stat;
230   double           *berr=0;
231   IS               isrow;
232   PetscLogDouble   time0,time,time_min,time_max;
233   Mat              F_diag=PETSC_NULL;
234 #if defined(PETSC_USE_COMPLEX)
235   doublecomplex    *av, *bv;
236 #else
237   double           *av, *bv;
238 #endif
239 
240   PetscFunctionBegin;
241   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
242   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
243 
244   if (lu->options.PrintStat) { /* collect time for mat conversion */
245     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
246     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
247   }
248 
249   if (lu->MatInputMode == GLOBAL) { /* global mat input */
250     if (size > 1) { /* convert mpi A to seq mat A */
251       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
252       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
253       ierr = ISDestroy(isrow);CHKERRQ(ierr);
254 
255       A_seq = *tseq;
256       ierr = PetscFree(tseq);CHKERRQ(ierr);
257       aa =  (Mat_SeqAIJ*)A_seq->data;
258     } else {
259       aa =  (Mat_SeqAIJ*)A->data;
260     }
261 
262     /* Convert Petsc NR matrix to SuperLU_DIST NC.
263        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
264     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
265       if (lu->FactPattern == SamePattern_SameRowPerm){
266         Destroy_CompCol_Matrix_dist(&lu->A_sup);
267         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
268         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
269       } else {
270         Destroy_CompCol_Matrix_dist(&lu->A_sup);
271         Destroy_LU(N, &lu->grid, &lu->LUstruct);
272         lu->options.Fact = SamePattern;
273       }
274     }
275 #if defined(PETSC_USE_COMPLEX)
276     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
277 #else
278     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
279 #endif
280 
281     /* Create compressed column matrix A_sup. */
282 #if defined(PETSC_USE_COMPLEX)
283     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
284 #else
285     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
286 #endif
287   } else { /* distributed mat input */
288     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
289     aa=(Mat_SeqAIJ*)(mat->A)->data;
290     bb=(Mat_SeqAIJ*)(mat->B)->data;
291     ai=aa->i; aj=aa->j;
292     bi=bb->i; bj=bb->j;
293 #if defined(PETSC_USE_COMPLEX)
294     av=(doublecomplex*)aa->a;
295     bv=(doublecomplex*)bb->a;
296 #else
297     av=aa->a;
298     bv=bb->a;
299 #endif
300     rstart = A->rmap.rstart;
301     nz     = aa->nz + bb->nz;
302     garray = mat->garray;
303 
304     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
305 #if defined(PETSC_USE_COMPLEX)
306       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
307 #else
308       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
309 #endif
310     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
311       if (lu->FactPattern == SamePattern_SameRowPerm){
312         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
313         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
314       } else {
315         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
316         lu->options.Fact = SamePattern;
317       }
318     }
319     nz = 0; irow = rstart;
320     for ( i=0; i<m; i++ ) {
321       lu->row[i] = nz;
322       countA = ai[i+1] - ai[i];
323       countB = bi[i+1] - bi[i];
324       ajj = aj + ai[i];  /* ptr to the beginning of this row */
325       bjj = bj + bi[i];
326 
327       /* B part, smaller col index */
328       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
329       jB = 0;
330       for (j=0; j<countB; j++){
331         jcol = garray[bjj[j]];
332         if (jcol > colA_start) {
333           jB = j;
334           break;
335         }
336         lu->col[nz] = jcol;
337         lu->val[nz++] = *bv++;
338         if (j==countB-1) jB = countB;
339       }
340 
341       /* A part */
342       for (j=0; j<countA; j++){
343         lu->col[nz] = rstart + ajj[j];
344         lu->val[nz++] = *av++;
345       }
346 
347       /* B part, larger col index */
348       for (j=jB; j<countB; j++){
349         lu->col[nz] = garray[bjj[j]];
350         lu->val[nz++] = *bv++;
351       }
352     }
353     lu->row[m] = nz;
354 #if defined(PETSC_USE_COMPLEX)
355     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
356 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
357 #else
358     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
359 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
360 #endif
361   }
362   if (lu->options.PrintStat) {
363     ierr = PetscGetTime(&time);CHKERRQ(ierr);
364     time0 = time - time0;
365   }
366 
367   /* Factor the matrix. */
368   PStatInit(&stat);   /* Initialize the statistics variables. */
369 
370   if (lu->MatInputMode == GLOBAL) { /* global mat input */
371 #if defined(PETSC_USE_COMPLEX)
372     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
373                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
374 #else
375     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
376                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
377 #endif
378   } else { /* distributed mat input */
379 #if defined(PETSC_USE_COMPLEX)
380     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
381 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
382     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
383 #else
384     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
385 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
386     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
387 #endif
388   }
389 
390   if (lu->MatInputMode == GLOBAL && size > 1){
391     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
392   }
393 
394   if (lu->options.PrintStat) {
395     if (size > 1){
396       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
397       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
398       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
399       time = time/size; /* average time */
400       if (!rank) {
401         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
402     } else {
403       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \    %g\n",time0);CHKERRQ(ierr);
404     }
405     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
406   }
407   PStatFree(&stat);
408   if (size > 1){
409     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
410     F_diag->assembled = PETSC_TRUE;
411   }
412   (*F)->assembled  = PETSC_TRUE;
413   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
414   PetscFunctionReturn(0);
415 }
416 
417 /* Note the Petsc r and c permutations are ignored */
418 #undef __FUNCT__
419 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
420 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
421 {
422   Mat               B;
423   Mat_SuperLU_DIST  *lu;
424   PetscErrorCode    ierr;
425   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
426   PetscMPIInt       size;
427   superlu_options_t options;
428   PetscTruth        flg;
429   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
430   const char        *prtype[] = {"LargeDiag","NATURAL"};
431   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
432 
433   PetscFunctionBegin;
434   /* Create the factorization matrix */
435   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
436   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
437   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
438   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
439   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
440 
441   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
442   B->ops->solve            = MatSolve_SuperLU_DIST;
443   B->factor                = FACTOR_LU;
444 
445   lu = (Mat_SuperLU_DIST*)(B->spptr);
446 
447   /*   Set the default input options:
448         options.Fact = DOFACT;
449         options.Equil = YES;
450         options.ColPerm = MMD_AT_PLUS_A;
451         options.RowPerm = LargeDiag;
452         options.ReplaceTinyPivot = YES;
453         options.Trans = NOTRANS;
454         options.IterRefine = DOUBLE;
455         options.SolveInitialized = NO;
456         options.RefineInitialized = NO;
457         options.PrintStat = YES;
458   */
459   set_default_options_dist(&options);
460 
461   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
462   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
463   /* Default num of process columns and rows */
464   lu->npcol = (PetscMPIInt)(0.5 + sqrt((PetscReal)size));
465   if (!lu->npcol) lu->npcol = 1;
466   while (lu->npcol > 0) {
467     lu->nprow = (PetscMPIInt)(size/lu->npcol);
468     if (size == lu->nprow * lu->npcol) break;
469     lu->npcol --;
470   }
471 
472   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
473     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
474     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
475     if (size != lu->nprow * lu->npcol)
476       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
477 
478     lu->MatInputMode = DISTRIBUTED;
479     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
480     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
481 
482     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
483     if (!flg) {
484       options.Equil = NO;
485     }
486 
487     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
488     if (flg) {
489       switch (indx) {
490       case 0:
491         options.RowPerm = LargeDiag;
492         break;
493       case 1:
494         options.RowPerm = NOROWPERM;
495         break;
496       }
497     }
498 
499     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);CHKERRQ(ierr);
500     if (flg) {
501       switch (indx) {
502       case 0:
503         options.ColPerm = MMD_AT_PLUS_A;
504         break;
505       case 1:
506         options.ColPerm = NATURAL;
507         break;
508       case 2:
509         options.ColPerm = MMD_ATA;
510         break;
511       }
512     }
513 
514     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
515     if (!flg) {
516       options.ReplaceTinyPivot = NO;
517     }
518 
519     lu->FactPattern = SamePattern;
520     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
521     if (flg) {
522       switch (indx) {
523       case 0:
524         lu->FactPattern = SamePattern;
525         break;
526       case 1:
527         lu->FactPattern = SamePattern_SameRowPerm;
528         break;
529       }
530     }
531 
532     options.IterRefine = NOREFINE;
533     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
534     if (flg) {
535       options.IterRefine = DOUBLE;
536     }
537 
538     if (PetscLogPrintInfo) {
539       options.PrintStat = YES;
540     } else {
541       options.PrintStat = NO;
542     }
543     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
544                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
545   PetscOptionsEnd();
546 
547   /* Initialize the SuperLU process grid. */
548   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
549 
550   /* Initialize ScalePermstruct and LUstruct. */
551   ScalePermstructInit(M, N, &lu->ScalePermstruct);
552   LUstructInit(M, N, &lu->LUstruct);
553 
554   lu->options             = options;
555   lu->options.Fact        = DOFACT;
556   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
557   *F = B;
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
563 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
564   PetscErrorCode   ierr;
565   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
566 
567   PetscFunctionBegin;
568   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
569   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
570   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
576 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
577 {
578   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
579   superlu_options_t options;
580   PetscErrorCode    ierr;
581 
582   PetscFunctionBegin;
583   /* check if matrix is superlu_dist type */
584   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
585 
586   options = lu->options;
587   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
588   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
589   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
590   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
591   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
592   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
593   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
594   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
595   if (options.ColPerm == NATURAL) {
596     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
597   } else if (options.ColPerm == MMD_AT_PLUS_A) {
598     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
599   } else if (options.ColPerm == MMD_ATA) {
600     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
601   } else {
602     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
603   }
604 
605   if (lu->FactPattern == SamePattern){
606     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
607   } else {
608     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
609   }
610   PetscFunctionReturn(0);
611 }
612 
613 #undef __FUNCT__
614 #define __FUNCT__ "MatView_SuperLU_DIST"
615 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
616 {
617   PetscErrorCode    ierr;
618   PetscTruth        iascii;
619   PetscViewerFormat format;
620   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
621 
622   PetscFunctionBegin;
623   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
624 
625   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
626   if (iascii) {
627     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
628     if (format == PETSC_VIEWER_ASCII_INFO) {
629       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 
636 EXTERN_C_BEGIN
637 #undef __FUNCT__
638 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST"
639 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
640 {
641   /* This routine is only called to convert to MATSUPERLU_DIST */
642   /* from MATSEQAIJ if A has a single process communicator */
643   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
644   PetscErrorCode   ierr;
645   PetscMPIInt      size;
646   MPI_Comm         comm;
647   Mat              B=*newmat;
648   Mat_SuperLU_DIST *lu;
649 
650   PetscFunctionBegin;
651   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
652   if (reuse == MAT_INITIAL_MATRIX) {
653     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
654     lu->MatDuplicate         = B->ops->duplicate;
655     lu->MatView              = B->ops->view;
656     lu->MatAssemblyEnd       = B->ops->assemblyend;
657     lu->MatLUFactorSymbolic  = B->ops->lufactorsymbolic;
658     lu->MatDestroy           = B->ops->destroy;
659   } else {
660     lu->MatDuplicate         = A->ops->duplicate;
661     lu->MatView              = A->ops->view;
662     lu->MatAssemblyEnd       = A->ops->assemblyend;
663     lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
664     lu->MatDestroy           = A->ops->destroy;
665   }
666   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
667 
668   B->spptr                 = (void*)lu;
669   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
670   B->ops->view             = MatView_SuperLU_DIST;
671   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
672   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
673   B->ops->destroy          = MatDestroy_SuperLU_DIST;
674 
675   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
676   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
677   if (size == 1) {
678     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
679     "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
680     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
681     "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
682   } else {
683     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
684                                              "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
685     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
686                                              "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
687   }
688   ierr = PetscInfo(A,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr);
689   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
690   *newmat = B;
691   PetscFunctionReturn(0);
692 }
693 EXTERN_C_END
694 
695 #undef __FUNCT__
696 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
697 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
698   PetscErrorCode   ierr;
699   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
700 
701   PetscFunctionBegin;
702   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
703   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
704   PetscFunctionReturn(0);
705 }
706 
707 /*MC
708   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
709   via the external package SuperLU_DIST.
710 
711   If SuperLU_DIST is installed (see the manual for
712   instructions on how to declare the existence of external packages),
713   a matrix type can be constructed which invokes SuperLU_DIST solvers.
714   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
715   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
716   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
717 
718   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
719   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
720   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
721   for communicators controlling multiple processes.  It is recommended that you call both of
722   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
723   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
724   without data copy; but this MUST be called AFTER the matrix values are set.
725 
726 
727 
728   Options Database Keys:
729 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
730 . -mat_superlu_dist_r <n> - number of rows in processor partition
731 . -mat_superlu_dist_c <n> - number of columns in processor partition
732 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
733 . -mat_superlu_dist_equil - equilibrate the matrix
734 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
735 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
736 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
737 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
738 . -mat_superlu_dist_iterrefine - use iterative refinement
739 - -mat_superlu_dist_statprint - print factorization information
740 
741    Level: beginner
742 
743 .seealso: PCLU
744 M*/
745 
746 EXTERN_C_BEGIN
747 #undef __FUNCT__
748 #define __FUNCT__ "MatCreate_SuperLU_DIST"
749 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
750 {
751   PetscErrorCode ierr;
752   PetscMPIInt    size;
753 
754   PetscFunctionBegin;
755   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
756   if (size == 1) {
757     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
758   } else {
759     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
760     /*  A_diag = 0x0 ???  -- do we need it?
761     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
762     ierr = MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
763     */
764   }
765   ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
766   PetscFunctionReturn(0);
767 }
768 EXTERN_C_END
769 
770