xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision ce0a2cd1da0658c2b28aad1be2e2c8e41567bece)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.1 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   fact_t                  FactPattern;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* A few function pointers for inheritance */
42   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   PetscErrorCode (*MatView)(Mat,PetscViewer);
44   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   PetscErrorCode (*MatDestroy)(Mat);
47 
48   /* Flag to clean up (non-global) SuperLU objects during Destroy */
49   PetscTruth CleanUpSuperLU_Dist;
50 } Mat_SuperLU_DIST;
51 
52 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
53 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
54 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *);
55 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
56 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
57 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType);
58 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
59 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *);
60 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *);
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ"
65 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
66 {
67   PetscErrorCode   ierr;
68   Mat              B=*newmat;
69   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
70 
71   PetscFunctionBegin;
72   if (reuse == MAT_INITIAL_MATRIX) {
73     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
74   }
75   /* Reset the original function pointers */
76   B->ops->duplicate        = lu->MatDuplicate;
77   B->ops->view             = lu->MatView;
78   B->ops->assemblyend      = lu->MatAssemblyEnd;
79   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
80   B->ops->destroy          = lu->MatDestroy;
81   ierr     = PetscFree(lu);CHKERRQ(ierr);
82   A->spptr = PETSC_NULL;
83 
84   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
85   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
86   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
87   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
88 
89   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
90   *newmat = B;
91   PetscFunctionReturn(0);
92 }
93 EXTERN_C_END
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
97 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
98 {
99   PetscErrorCode   ierr;
100   PetscMPIInt      size;
101   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
102 
103   PetscFunctionBegin;
104   if (lu->CleanUpSuperLU_Dist) {
105     /* Deallocate SuperLU_DIST storage */
106     if (lu->MatInputMode == GLOBAL) {
107       Destroy_CompCol_Matrix_dist(&lu->A_sup);
108     } else {
109       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
110       if ( lu->options.SolveInitialized ) {
111 #if defined(PETSC_USE_COMPLEX)
112         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
113 #else
114         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
115 #endif
116       }
117     }
118     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
119     ScalePermstructFree(&lu->ScalePermstruct);
120     LUstructFree(&lu->LUstruct);
121 
122     /* Release the SuperLU_DIST process grid. */
123     superlu_gridexit(&lu->grid);
124 
125     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
126   }
127 
128   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
129   if (size == 1) {
130     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
131   } else {
132     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
133   }
134   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSolve_SuperLU_DIST"
140 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
141 {
142   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
143   PetscErrorCode   ierr;
144   PetscMPIInt      size;
145   PetscInt         m=A->rmap.N, N=A->cmap.N;
146   SuperLUStat_t    stat;
147   double           berr[1];
148   PetscScalar      *bptr;
149   PetscInt         info, nrhs=1;
150   Vec              x_seq;
151   IS               iden;
152   VecScatter       scat;
153 
154   PetscFunctionBegin;
155   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
156   if (size > 1) {
157     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
158       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
159       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
160       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
161       ierr = ISDestroy(iden);CHKERRQ(ierr);
162 
163       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
164       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
165       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
166     } else { /* distributed mat input */
167       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
168       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
169     }
170   } else { /* size == 1 */
171     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
172     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
173   }
174 
175   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
176 
177   PStatInit(&stat);        /* Initialize the statistics variables. */
178   if (lu->MatInputMode == GLOBAL) {
179 #if defined(PETSC_USE_COMPLEX)
180     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
181                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
182 #else
183     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
184                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
185 #endif
186   } else { /* distributed mat input */
187 #if defined(PETSC_USE_COMPLEX)
188     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
189 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
190     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
191 #else
192     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
193 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
194     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
195 #endif
196   }
197   if (lu->options.PrintStat) {
198      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
199   }
200   PStatFree(&stat);
201 
202   if (size > 1) {
203     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
204       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
205       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
206       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
207       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
208       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
209     } else {
210       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
211     }
212   } else {
213     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
220 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
221 {
222   Mat              *tseq,A_seq = PETSC_NULL;
223   Mat_SeqAIJ       *aa,*bb;
224   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
225   PetscErrorCode   ierr;
226   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
227                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
228   PetscMPIInt      size,rank;
229   SuperLUStat_t    stat;
230   double           *berr=0;
231   IS               isrow;
232   PetscLogDouble   time0,time,time_min,time_max;
233   Mat              F_diag=PETSC_NULL;
234 #if defined(PETSC_USE_COMPLEX)
235   doublecomplex    *av, *bv;
236 #else
237   double           *av, *bv;
238 #endif
239 
240   PetscFunctionBegin;
241   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
242   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
243 
244   if (lu->options.PrintStat) { /* collect time for mat conversion */
245     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
246     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
247   }
248 
249   if (lu->MatInputMode == GLOBAL) { /* global mat input */
250     if (size > 1) { /* convert mpi A to seq mat A */
251       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
252       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
253       ierr = ISDestroy(isrow);CHKERRQ(ierr);
254 
255       A_seq = *tseq;
256       ierr = PetscFree(tseq);CHKERRQ(ierr);
257       aa =  (Mat_SeqAIJ*)A_seq->data;
258     } else {
259       aa =  (Mat_SeqAIJ*)A->data;
260     }
261 
262     /* Convert Petsc NR matrix to SuperLU_DIST NC.
263        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
264     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
265       if (lu->FactPattern == SamePattern_SameRowPerm){
266         Destroy_CompCol_Matrix_dist(&lu->A_sup);
267         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
268         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
269       } else {
270         Destroy_CompCol_Matrix_dist(&lu->A_sup);
271         Destroy_LU(N, &lu->grid, &lu->LUstruct);
272         lu->options.Fact = SamePattern;
273       }
274     }
275 #if defined(PETSC_USE_COMPLEX)
276     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
277 #else
278     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
279 #endif
280 
281     /* Create compressed column matrix A_sup. */
282 #if defined(PETSC_USE_COMPLEX)
283     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
284 #else
285     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
286 #endif
287   } else { /* distributed mat input */
288     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
289     aa=(Mat_SeqAIJ*)(mat->A)->data;
290     bb=(Mat_SeqAIJ*)(mat->B)->data;
291     ai=aa->i; aj=aa->j;
292     bi=bb->i; bj=bb->j;
293 #if defined(PETSC_USE_COMPLEX)
294     av=(doublecomplex*)aa->a;
295     bv=(doublecomplex*)bb->a;
296 #else
297     av=aa->a;
298     bv=bb->a;
299 #endif
300     rstart = A->rmap.rstart;
301     nz     = aa->nz + bb->nz;
302     garray = mat->garray;
303 
304     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
305 #if defined(PETSC_USE_COMPLEX)
306       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
307 #else
308       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
309 #endif
310     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
311       if (lu->FactPattern == SamePattern_SameRowPerm){
312         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
313         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
314       } else {
315         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
316         lu->options.Fact = SamePattern;
317       }
318     }
319     nz = 0; irow = rstart;
320     for ( i=0; i<m; i++ ) {
321       lu->row[i] = nz;
322       countA = ai[i+1] - ai[i];
323       countB = bi[i+1] - bi[i];
324       ajj = aj + ai[i];  /* ptr to the beginning of this row */
325       bjj = bj + bi[i];
326 
327       /* B part, smaller col index */
328       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
329       jB = 0;
330       for (j=0; j<countB; j++){
331         jcol = garray[bjj[j]];
332         if (jcol > colA_start) {
333           jB = j;
334           break;
335         }
336         lu->col[nz] = jcol;
337         lu->val[nz++] = *bv++;
338         if (j==countB-1) jB = countB;
339       }
340 
341       /* A part */
342       for (j=0; j<countA; j++){
343         lu->col[nz] = rstart + ajj[j];
344         lu->val[nz++] = *av++;
345       }
346 
347       /* B part, larger col index */
348       for (j=jB; j<countB; j++){
349         lu->col[nz] = garray[bjj[j]];
350         lu->val[nz++] = *bv++;
351       }
352     }
353     lu->row[m] = nz;
354 #if defined(PETSC_USE_COMPLEX)
355     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
356 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
357 #else
358     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
359 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
360 #endif
361   }
362   if (lu->options.PrintStat) {
363     ierr = PetscGetTime(&time);CHKERRQ(ierr);
364     time0 = time - time0;
365   }
366 
367   /* Factor the matrix. */
368   PStatInit(&stat);   /* Initialize the statistics variables. */
369 
370   if (lu->MatInputMode == GLOBAL) { /* global mat input */
371 #if defined(PETSC_USE_COMPLEX)
372     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
373                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
374 #else
375     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
376                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
377 #endif
378   } else { /* distributed mat input */
379 #if defined(PETSC_USE_COMPLEX)
380     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
381 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
382     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
383 #else
384     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
385 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
386     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
387 #endif
388   }
389 
390   if (lu->MatInputMode == GLOBAL && size > 1){
391     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
392   }
393 
394   if (lu->options.PrintStat) {
395     if (size > 1){
396       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
397       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
398       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
399       time = time/size; /* average time */
400       if (!rank) {
401         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
402       }
403     } else {
404       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
405     }
406     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
407   }
408   PStatFree(&stat);
409   if (size > 1){
410     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
411     F_diag->assembled = PETSC_TRUE;
412   }
413   (*F)->assembled  = PETSC_TRUE;
414   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
415   PetscFunctionReturn(0);
416 }
417 
418 /* Note the Petsc r and c permutations are ignored */
419 #undef __FUNCT__
420 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
421 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
422 {
423   Mat               B;
424   Mat_SuperLU_DIST  *lu;
425   PetscErrorCode    ierr;
426   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
427   PetscMPIInt       size;
428   superlu_options_t options;
429   PetscTruth        flg;
430   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
431   const char        *prtype[] = {"LargeDiag","NATURAL"};
432   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
433 
434   PetscFunctionBegin;
435   /* Create the factorization matrix */
436   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
437   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
438   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
439   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
440   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
441 
442   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
443   B->ops->solve            = MatSolve_SuperLU_DIST;
444   B->factor                = FACTOR_LU;
445 
446   lu = (Mat_SuperLU_DIST*)(B->spptr);
447 
448   /*   Set the default input options:
449         options.Fact              = DOFACT;
450         options.Equil             = YES;
451         options.ColPerm           = MMD_AT_PLUS_A;
452         options.RowPerm           = LargeDiag;
453         options.ReplaceTinyPivot  = YES;
454         options.ParSymbFact       = NO;
455         options.Trans             = NOTRANS;
456         options.IterRefine        = DOUBLE;
457         options.SolveInitialized  = NO;
458         options.RefineInitialized = NO;
459         options.PrintStat         = YES;
460   */
461   set_default_options_dist(&options);
462 
463   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr);
464   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
465   /* Default num of process columns and rows */
466   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
467   if (!lu->npcol) lu->npcol = 1;
468   while (lu->npcol > 0) {
469     lu->nprow = PetscMPIIntCast(size/lu->npcol);
470     if (size == lu->nprow * lu->npcol) break;
471     lu->npcol --;
472   }
473 
474   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
475     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
476     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
477     if (size != lu->nprow * lu->npcol)
478       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
479 
480     lu->MatInputMode = DISTRIBUTED;
481     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
482     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
483 
484     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
485     if (!flg) {
486       options.Equil = NO;
487     }
488 
489     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
490     if (flg) {
491       switch (indx) {
492       case 0:
493         options.RowPerm = LargeDiag;
494         break;
495       case 1:
496         options.RowPerm = NOROWPERM;
497         break;
498       }
499     }
500 
501     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);CHKERRQ(ierr);
502     if (flg) {
503       switch (indx) {
504       case 0:
505         options.ColPerm = MMD_AT_PLUS_A;
506         break;
507       case 1:
508         options.ColPerm = NATURAL;
509         break;
510       case 2:
511         options.ColPerm = MMD_ATA;
512         break;
513       }
514     }
515 
516     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
517     if (!flg) {
518       options.ReplaceTinyPivot = NO;
519     }
520 
521     options.ParSymbFact = NO;
522     ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
523     if (flg){
524 #ifdef PETSC_HAVE_PARMETIS
525       options.ParSymbFact = YES;
526       options.ColPerm     = PARMETIS;
527 #else
528       printf("parsymbfact needs PARMETIS");
529 #endif
530     }
531 
532     lu->FactPattern = SamePattern;
533     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
534     if (flg) {
535       switch (indx) {
536       case 0:
537         lu->FactPattern = SamePattern;
538         break;
539       case 1:
540         lu->FactPattern = SamePattern_SameRowPerm;
541         break;
542       }
543     }
544 
545     options.IterRefine = NOREFINE;
546     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
547     if (flg) {
548       options.IterRefine = DOUBLE;
549     }
550 
551     if (PetscLogPrintInfo) {
552       options.PrintStat = YES;
553     } else {
554       options.PrintStat = NO;
555     }
556     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
557                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
558   PetscOptionsEnd();
559 
560   /* Initialize the SuperLU process grid. */
561   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
562 
563   /* Initialize ScalePermstruct and LUstruct. */
564   ScalePermstructInit(M, N, &lu->ScalePermstruct);
565   LUstructInit(M, N, &lu->LUstruct);
566 
567   lu->options             = options;
568   lu->options.Fact        = DOFACT;
569   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
570   *F = B;
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
576 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
577   PetscErrorCode   ierr;
578   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
579 
580   PetscFunctionBegin;
581   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
582   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
583   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
584   PetscFunctionReturn(0);
585 }
586 
587 #undef __FUNCT__
588 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
589 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
590 {
591   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
592   superlu_options_t options;
593   PetscErrorCode    ierr;
594 
595   PetscFunctionBegin;
596   /* check if matrix is superlu_dist type */
597   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
598 
599   options = lu->options;
600   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
601   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
602   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
603   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
604   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
605   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
606   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
607   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
608   if (options.ColPerm == NATURAL) {
609     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
610   } else if (options.ColPerm == MMD_AT_PLUS_A) {
611     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
612   } else if (options.ColPerm == MMD_ATA) {
613     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
614   } else {
615     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
616   }
617 
618   if (lu->FactPattern == SamePattern){
619     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
620   } else {
621     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
622   }
623   PetscFunctionReturn(0);
624 }
625 
626 #undef __FUNCT__
627 #define __FUNCT__ "MatView_SuperLU_DIST"
628 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
629 {
630   PetscErrorCode    ierr;
631   PetscTruth        iascii;
632   PetscViewerFormat format;
633   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
634 
635   PetscFunctionBegin;
636   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
637 
638   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
639   if (iascii) {
640     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
641     if (format == PETSC_VIEWER_ASCII_INFO) {
642       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
643     }
644   }
645   PetscFunctionReturn(0);
646 }
647 
648 
649 EXTERN_C_BEGIN
650 #undef __FUNCT__
651 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST"
652 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
653 {
654   /* This routine is only called to convert to MATSUPERLU_DIST */
655   /* from MATSEQAIJ if A has a single process communicator */
656   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
657   PetscErrorCode   ierr;
658   PetscMPIInt      size;
659   MPI_Comm         comm;
660   Mat              B=*newmat;
661   Mat_SuperLU_DIST *lu;
662 
663   PetscFunctionBegin;
664   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
665   if (reuse == MAT_INITIAL_MATRIX) {
666     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
667     lu->MatDuplicate         = B->ops->duplicate;
668     lu->MatView              = B->ops->view;
669     lu->MatAssemblyEnd       = B->ops->assemblyend;
670     lu->MatLUFactorSymbolic  = B->ops->lufactorsymbolic;
671     lu->MatDestroy           = B->ops->destroy;
672   } else {
673     lu->MatDuplicate         = A->ops->duplicate;
674     lu->MatView              = A->ops->view;
675     lu->MatAssemblyEnd       = A->ops->assemblyend;
676     lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
677     lu->MatDestroy           = A->ops->destroy;
678   }
679   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
680 
681   B->spptr                 = (void*)lu;
682   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
683   B->ops->view             = MatView_SuperLU_DIST;
684   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
685   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
686   B->ops->destroy          = MatDestroy_SuperLU_DIST;
687 
688   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
689   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
690   if (size == 1) {
691     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
692     "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
693     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
694     "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
695   } else {
696     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
697                                              "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
698     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
699                                              "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
700   }
701   ierr = PetscInfo(A,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr);
702   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
703   *newmat = B;
704   PetscFunctionReturn(0);
705 }
706 EXTERN_C_END
707 
708 #undef __FUNCT__
709 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
710 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
711   PetscErrorCode   ierr;
712   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
713 
714   PetscFunctionBegin;
715   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
716   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
717   PetscFunctionReturn(0);
718 }
719 
720 /*MC
721   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
722   via the external package SuperLU_DIST.
723 
724   If SuperLU_DIST is installed (see the manual for
725   instructions on how to declare the existence of external packages),
726   a matrix type can be constructed which invokes SuperLU_DIST solvers.
727   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
728   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
729   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
730 
731   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
732   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
733   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
734   for communicators controlling multiple processes.  It is recommended that you call both of
735   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
736   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
737   without data copy; but this MUST be called AFTER the matrix values are set.
738 
739 
740 
741   Options Database Keys:
742 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
743 . -mat_superlu_dist_r <n> - number of rows in processor partition
744 . -mat_superlu_dist_c <n> - number of columns in processor partition
745 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
746 . -mat_superlu_dist_equil - equilibrate the matrix
747 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
748 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
749 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
750 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
751 . -mat_superlu_dist_iterrefine - use iterative refinement
752 - -mat_superlu_dist_statprint - print factorization information
753 
754    Level: beginner
755 
756 .seealso: PCLU
757 M*/
758 
759 EXTERN_C_BEGIN
760 #undef __FUNCT__
761 #define __FUNCT__ "MatCreate_SuperLU_DIST"
762 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
763 {
764   PetscErrorCode ierr;
765   PetscMPIInt    size;
766 
767   PetscFunctionBegin;
768   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
769   if (size == 1) {
770     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
771   } else {
772     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
773     /*  A_diag = 0x0 ???  -- do we need it?
774     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
775     ierr = MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
776     */
777   }
778   ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
779   PetscFunctionReturn(0);
780 }
781 EXTERN_C_END
782 
783