xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 5aa7ee0f21100a07d39dbbadae00f3cd321aa91e)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.1 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   fact_t                  FactPattern;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* A few function pointers for inheritance */
42   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   PetscErrorCode (*MatView)(Mat,PetscViewer);
44   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   PetscErrorCode (*MatDestroy)(Mat);
47 
48   /* Flag to clean up (non-global) SuperLU objects during Destroy */
49   PetscTruth CleanUpSuperLU_Dist;
50 } Mat_SuperLU_DIST;
51 
52 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
53 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
54 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *);
55 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
56 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
57 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType);
58 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
59 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *);
60 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *);
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ"
65 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
66 {
67   PetscErrorCode   ierr;
68   Mat              B=*newmat;
69   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
70 
71   PetscFunctionBegin;
72   if (reuse == MAT_INITIAL_MATRIX) {
73     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
74   }
75   /* Reset the original function pointers */
76   B->ops->duplicate        = lu->MatDuplicate;
77   B->ops->view             = lu->MatView;
78   B->ops->assemblyend      = lu->MatAssemblyEnd;
79   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
80   B->ops->destroy          = lu->MatDestroy;
81   ierr     = PetscFree(lu);CHKERRQ(ierr);
82   A->spptr = PETSC_NULL;
83 
84   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
85   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
86   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
87   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
88 
89   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
90   *newmat = B;
91   PetscFunctionReturn(0);
92 }
93 EXTERN_C_END
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
97 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
98 {
99   PetscErrorCode   ierr;
100   PetscMPIInt      size;
101   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
102 
103   PetscFunctionBegin;
104   if (lu->CleanUpSuperLU_Dist) {
105     /* Deallocate SuperLU_DIST storage */
106     if (lu->MatInputMode == GLOBAL) {
107       Destroy_CompCol_Matrix_dist(&lu->A_sup);
108     } else {
109       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
110       if ( lu->options.SolveInitialized ) {
111 #if defined(PETSC_USE_COMPLEX)
112         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
113 #else
114         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
115 #endif
116       }
117     }
118     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
119     ScalePermstructFree(&lu->ScalePermstruct);
120     LUstructFree(&lu->LUstruct);
121 
122     /* Release the SuperLU_DIST process grid. */
123     superlu_gridexit(&lu->grid);
124 
125     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
126   }
127 
128   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
129   if (size == 1) {
130     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
131   } else {
132     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
133   }
134   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSolve_SuperLU_DIST"
140 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
141 {
142   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
143   PetscErrorCode   ierr;
144   PetscMPIInt      size;
145   PetscInt         m=A->rmap.N, N=A->cmap.N;
146   SuperLUStat_t    stat;
147   double           berr[1];
148   PetscScalar      *bptr;
149   PetscInt         info, nrhs=1;
150   Vec              x_seq;
151   IS               iden;
152   VecScatter       scat;
153 
154   PetscFunctionBegin;
155   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
156   if (size > 1) {
157     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
158       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
159       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
160       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
161       ierr = ISDestroy(iden);CHKERRQ(ierr);
162 
163       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
164       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
165       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
166     } else { /* distributed mat input */
167       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
168       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
169     }
170   } else { /* size == 1 */
171     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
172     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
173   }
174 
175   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
176 
177   PStatInit(&stat);        /* Initialize the statistics variables. */
178   if (lu->MatInputMode == GLOBAL) {
179 #if defined(PETSC_USE_COMPLEX)
180     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
181                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
182 #else
183     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
184                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
185 #endif
186   } else { /* distributed mat input */
187 #if defined(PETSC_USE_COMPLEX)
188     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
189 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
190     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
191 #else
192     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
193 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
194     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
195 #endif
196   }
197   if (lu->options.PrintStat) {
198      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
199   }
200   PStatFree(&stat);
201 
202   if (size > 1) {
203     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
204       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
205       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
206       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
207       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
208       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
209     } else {
210       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
211     }
212   } else {
213     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
220 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
221 {
222   Mat              *tseq,A_seq = PETSC_NULL;
223   Mat_SeqAIJ       *aa,*bb;
224   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
225   PetscErrorCode   ierr;
226   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
227                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
228   PetscMPIInt      size,rank;
229   SuperLUStat_t    stat;
230   double           *berr=0;
231   IS               isrow;
232   PetscLogDouble   time0,time,time_min,time_max;
233   Mat              F_diag=PETSC_NULL;
234 #if defined(PETSC_USE_COMPLEX)
235   doublecomplex    *av, *bv;
236 #else
237   double           *av, *bv;
238 #endif
239 
240   PetscFunctionBegin;
241   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
242   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
243 
244   if (lu->options.PrintStat) { /* collect time for mat conversion */
245     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
246     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
247   }
248 
249   if (lu->MatInputMode == GLOBAL) { /* global mat input */
250     if (size > 1) { /* convert mpi A to seq mat A */
251       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
252       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
253       ierr = ISDestroy(isrow);CHKERRQ(ierr);
254 
255       A_seq = *tseq;
256       ierr = PetscFree(tseq);CHKERRQ(ierr);
257       aa =  (Mat_SeqAIJ*)A_seq->data;
258     } else {
259       aa =  (Mat_SeqAIJ*)A->data;
260     }
261 
262     /* Convert Petsc NR matrix to SuperLU_DIST NC.
263        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
264     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
265       if (lu->FactPattern == SamePattern_SameRowPerm){
266         Destroy_CompCol_Matrix_dist(&lu->A_sup);
267         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
268         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
269       } else {
270         Destroy_CompCol_Matrix_dist(&lu->A_sup);
271         Destroy_LU(N, &lu->grid, &lu->LUstruct);
272         lu->options.Fact = SamePattern;
273       }
274     }
275 #if defined(PETSC_USE_COMPLEX)
276     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
277 #else
278     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
279 #endif
280 
281     /* Create compressed column matrix A_sup. */
282 #if defined(PETSC_USE_COMPLEX)
283     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
284 #else
285     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
286 #endif
287   } else { /* distributed mat input */
288     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
289     aa=(Mat_SeqAIJ*)(mat->A)->data;
290     bb=(Mat_SeqAIJ*)(mat->B)->data;
291     ai=aa->i; aj=aa->j;
292     bi=bb->i; bj=bb->j;
293 #if defined(PETSC_USE_COMPLEX)
294     av=(doublecomplex*)aa->a;
295     bv=(doublecomplex*)bb->a;
296 #else
297     av=aa->a;
298     bv=bb->a;
299 #endif
300     rstart = A->rmap.rstart;
301     nz     = aa->nz + bb->nz;
302     garray = mat->garray;
303 
304     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
305 #if defined(PETSC_USE_COMPLEX)
306       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
307 #else
308       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
309 #endif
310     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
311       if (lu->FactPattern == SamePattern_SameRowPerm){
312         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
313         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
314       } else {
315         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
316         lu->options.Fact = SamePattern;
317       }
318     }
319     nz = 0; irow = rstart;
320     for ( i=0; i<m; i++ ) {
321       lu->row[i] = nz;
322       countA = ai[i+1] - ai[i];
323       countB = bi[i+1] - bi[i];
324       ajj = aj + ai[i];  /* ptr to the beginning of this row */
325       bjj = bj + bi[i];
326 
327       /* B part, smaller col index */
328       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
329       jB = 0;
330       for (j=0; j<countB; j++){
331         jcol = garray[bjj[j]];
332         if (jcol > colA_start) {
333           jB = j;
334           break;
335         }
336         lu->col[nz] = jcol;
337         lu->val[nz++] = *bv++;
338         if (j==countB-1) jB = countB;
339       }
340 
341       /* A part */
342       for (j=0; j<countA; j++){
343         lu->col[nz] = rstart + ajj[j];
344         lu->val[nz++] = *av++;
345       }
346 
347       /* B part, larger col index */
348       for (j=jB; j<countB; j++){
349         lu->col[nz] = garray[bjj[j]];
350         lu->val[nz++] = *bv++;
351       }
352     }
353     lu->row[m] = nz;
354 #if defined(PETSC_USE_COMPLEX)
355     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
356 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
357 #else
358     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
359 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
360 #endif
361   }
362   if (lu->options.PrintStat) {
363     ierr = PetscGetTime(&time);CHKERRQ(ierr);
364     time0 = time - time0;
365   }
366 
367   /* Factor the matrix. */
368   PStatInit(&stat);   /* Initialize the statistics variables. */
369 
370   if (lu->MatInputMode == GLOBAL) { /* global mat input */
371 #if defined(PETSC_USE_COMPLEX)
372     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
373                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
374 #else
375     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
376                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
377 #endif
378   } else { /* distributed mat input */
379 #if defined(PETSC_USE_COMPLEX)
380     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
381 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
382     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
383 #else
384     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
385 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
386     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
387 #endif
388   }
389 
390   if (lu->MatInputMode == GLOBAL && size > 1){
391     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
392   }
393 
394   if (lu->options.PrintStat) {
395     if (size > 1){
396       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
397       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
398       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
399       time = time/size; /* average time */
400       if (!rank) {
401         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
402       }
403     } else {
404       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
405     }
406     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
407   }
408   PStatFree(&stat);
409   if (size > 1){
410     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
411     F_diag->assembled = PETSC_TRUE;
412   }
413   (*F)->assembled  = PETSC_TRUE;
414   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
415   PetscFunctionReturn(0);
416 }
417 
418 /* Note the Petsc r and c permutations are ignored */
419 #undef __FUNCT__
420 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
421 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
422 {
423   Mat               B;
424   Mat_SuperLU_DIST  *lu;
425   PetscErrorCode    ierr;
426   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
427   PetscMPIInt       size;
428   superlu_options_t options;
429   PetscTruth        flg;
430   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
431   const char        *prtype[] = {"LargeDiag","NATURAL"};
432   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
433 
434   PetscFunctionBegin;
435   /* Create the factorization matrix */
436   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
437   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
438   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
439   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
440   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
441 
442   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
443   B->ops->solve            = MatSolve_SuperLU_DIST;
444   B->factor                = FACTOR_LU;
445 
446   lu = (Mat_SuperLU_DIST*)(B->spptr);
447 
448   /*   Set the default input options:
449         options.Fact              = DOFACT;
450         options.Equil             = YES;
451         options.ColPerm           = MMD_AT_PLUS_A;
452         options.RowPerm           = LargeDiag;
453         options.ReplaceTinyPivot  = YES;
454         options.ParSymbFact       = NO;
455         options.Trans             = NOTRANS;
456         options.IterRefine        = DOUBLE;
457         options.SolveInitialized  = NO;
458         options.RefineInitialized = NO;
459         options.PrintStat         = YES;
460   */
461   set_default_options_dist(&options);
462 
463   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr);
464   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
465   /* Default num of process columns and rows */
466   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
467   if (!lu->npcol) lu->npcol = 1;
468   while (lu->npcol > 0) {
469     lu->nprow = PetscMPIIntCast(size/lu->npcol);
470     if (size == lu->nprow * lu->npcol) break;
471     lu->npcol --;
472   }
473 
474   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
475     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
476     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
477     if (size != lu->nprow * lu->npcol)
478       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
479 
480     lu->MatInputMode = DISTRIBUTED;
481     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
482     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
483 
484     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
485     if (!flg) {
486       options.Equil = NO;
487     }
488 
489     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
490     if (flg) {
491       switch (indx) {
492       case 0:
493         options.RowPerm = LargeDiag;
494         break;
495       case 1:
496         options.RowPerm = NOROWPERM;
497         break;
498       }
499     }
500 
501     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr);
502     if (flg) {
503       switch (indx) {
504       case 0:
505         options.ColPerm = MMD_AT_PLUS_A;
506         break;
507       case 1:
508         options.ColPerm = NATURAL;
509         break;
510       case 2:
511         options.ColPerm = MMD_ATA;
512         break;
513       case 3:
514         options.ColPerm = PARMETIS;
515         break;
516       }
517     }
518 
519     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
520     if (!flg) {
521       options.ReplaceTinyPivot = NO;
522     }
523 
524     options.ParSymbFact = NO;
525     ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
526     if (flg){
527 #ifdef PETSC_HAVE_PARMETIS
528       options.ParSymbFact = YES;
529 #else
530       printf("parsymbfact needs PARMETIS");
531 #endif
532     }
533 
534     lu->FactPattern = SamePattern;
535     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
536     if (flg) {
537       switch (indx) {
538       case 0:
539         lu->FactPattern = SamePattern;
540         break;
541       case 1:
542         lu->FactPattern = SamePattern_SameRowPerm;
543         break;
544       }
545     }
546 
547     options.IterRefine = NOREFINE;
548     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
549     if (flg) {
550       options.IterRefine = DOUBLE;
551     }
552 
553     if (PetscLogPrintInfo) {
554       options.PrintStat = YES;
555     } else {
556       options.PrintStat = NO;
557     }
558     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
559                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
560   PetscOptionsEnd();
561 
562   /* Initialize the SuperLU process grid. */
563   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
564 
565   /* Initialize ScalePermstruct and LUstruct. */
566   ScalePermstructInit(M, N, &lu->ScalePermstruct);
567   LUstructInit(M, N, &lu->LUstruct);
568 
569   lu->options             = options;
570   lu->options.Fact        = DOFACT;
571   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
572   *F = B;
573   PetscFunctionReturn(0);
574 }
575 
576 #undef __FUNCT__
577 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
578 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
579   PetscErrorCode   ierr;
580   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
581 
582   PetscFunctionBegin;
583   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
584   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
585   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
591 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
592 {
593   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
594   superlu_options_t options;
595   PetscErrorCode    ierr;
596 
597   PetscFunctionBegin;
598   /* check if matrix is superlu_dist type */
599   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
600 
601   options = lu->options;
602   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
603   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
604   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
605   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
606   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
607   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
608   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
609   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
610   if (options.ColPerm == NATURAL) {
611     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
612   } else if (options.ColPerm == MMD_AT_PLUS_A) {
613     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
614   } else if (options.ColPerm == MMD_ATA) {
615     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
616   } else if (options.ColPerm == PARMETIS) {
617     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");CHKERRQ(ierr);
618   } else {
619     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
620   }
621 
622   ierr = PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr);
623 
624   if (lu->FactPattern == SamePattern){
625     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
626   } else {
627     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 #undef __FUNCT__
633 #define __FUNCT__ "MatView_SuperLU_DIST"
634 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
635 {
636   PetscErrorCode    ierr;
637   PetscTruth        iascii;
638   PetscViewerFormat format;
639   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
640 
641   PetscFunctionBegin;
642   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
643 
644   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
645   if (iascii) {
646     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
647     if (format == PETSC_VIEWER_ASCII_INFO) {
648       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
649     }
650   }
651   PetscFunctionReturn(0);
652 }
653 
654 
655 EXTERN_C_BEGIN
656 #undef __FUNCT__
657 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST"
658 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
659 {
660   /* This routine is only called to convert to MATSUPERLU_DIST */
661   /* from MATSEQAIJ if A has a single process communicator */
662   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
663   PetscErrorCode   ierr;
664   PetscMPIInt      size;
665   MPI_Comm         comm;
666   Mat              B=*newmat;
667   Mat_SuperLU_DIST *lu;
668 
669   PetscFunctionBegin;
670   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
671   if (reuse == MAT_INITIAL_MATRIX) {
672     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
673     lu->MatDuplicate         = B->ops->duplicate;
674     lu->MatView              = B->ops->view;
675     lu->MatAssemblyEnd       = B->ops->assemblyend;
676     lu->MatLUFactorSymbolic  = B->ops->lufactorsymbolic;
677     lu->MatDestroy           = B->ops->destroy;
678   } else {
679     lu->MatDuplicate         = A->ops->duplicate;
680     lu->MatView              = A->ops->view;
681     lu->MatAssemblyEnd       = A->ops->assemblyend;
682     lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
683     lu->MatDestroy           = A->ops->destroy;
684   }
685   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
686 
687   B->spptr                 = (void*)lu;
688   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
689   B->ops->view             = MatView_SuperLU_DIST;
690   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
691   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
692   B->ops->destroy          = MatDestroy_SuperLU_DIST;
693 
694   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
695   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
696   if (size == 1) {
697     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
698     "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
699     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
700     "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
701   } else {
702     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
703                                              "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
704     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
705                                              "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
706   }
707   ierr = PetscInfo(A,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr);
708   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
709   *newmat = B;
710   PetscFunctionReturn(0);
711 }
712 EXTERN_C_END
713 
714 #undef __FUNCT__
715 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
716 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
717   PetscErrorCode   ierr;
718   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
719 
720   PetscFunctionBegin;
721   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
722   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
723   PetscFunctionReturn(0);
724 }
725 
726 /*MC
727   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
728   via the external package SuperLU_DIST.
729 
730   If SuperLU_DIST is installed (see the manual for
731   instructions on how to declare the existence of external packages),
732   a matrix type can be constructed which invokes SuperLU_DIST solvers.
733   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
734   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
735   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
736 
737   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
738   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
739   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
740   for communicators controlling multiple processes.  It is recommended that you call both of
741   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
742   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
743   without data copy; but this MUST be called AFTER the matrix values are set.
744 
745 
746 
747   Options Database Keys:
748 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
749 . -mat_superlu_dist_r <n> - number of rows in processor partition
750 . -mat_superlu_dist_c <n> - number of columns in processor partition
751 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
752 . -mat_superlu_dist_equil - equilibrate the matrix
753 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
754 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
755 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
756 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
757 . -mat_superlu_dist_iterrefine - use iterative refinement
758 - -mat_superlu_dist_statprint - print factorization information
759 
760    Level: beginner
761 
762 .seealso: PCLU
763 M*/
764 
765 EXTERN_C_BEGIN
766 #undef __FUNCT__
767 #define __FUNCT__ "MatCreate_SuperLU_DIST"
768 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
769 {
770   PetscErrorCode ierr;
771   PetscMPIInt    size;
772 
773   PetscFunctionBegin;
774   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
775   if (size == 1) {
776     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
777   } else {
778     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
779     /*  A_diag = 0x0 ???  -- do we need it?
780     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
781     ierr = MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
782     */
783   }
784   ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
785   PetscFunctionReturn(0);
786 }
787 EXTERN_C_END
788 
789