xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision a8cc4164efbc8c06ea4f125838ee2896be8a9fc8)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.2 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   fact_t                  FactPattern;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* A few function pointers for inheritance */
42   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   PetscErrorCode (*MatView)(Mat,PetscViewer);
44   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   PetscErrorCode (*MatDestroy)(Mat);
47 
48   /* Flag to clean up (non-global) SuperLU objects during Destroy */
49   PetscTruth CleanUpSuperLU_Dist;
50 } Mat_SuperLU_DIST;
51 
52 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
53 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
54 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *);
55 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
56 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
57 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType);
58 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
59 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *);
60 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *);
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ"
65 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
66 {
67   PetscErrorCode   ierr;
68   Mat              B=*newmat;
69   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
70 
71   PetscFunctionBegin;
72   if (reuse == MAT_INITIAL_MATRIX) {
73     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
74   }
75   /* Reset the original function pointers */
76   B->ops->duplicate        = lu->MatDuplicate;
77   B->ops->view             = lu->MatView;
78   B->ops->assemblyend      = lu->MatAssemblyEnd;
79   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
80   B->ops->destroy          = lu->MatDestroy;
81   ierr     = PetscFree(lu);CHKERRQ(ierr);
82   A->spptr = PETSC_NULL;
83 
84   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
85   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
86   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
87   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
88 
89   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
90   *newmat = B;
91   PetscFunctionReturn(0);
92 }
93 EXTERN_C_END
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
97 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
98 {
99   PetscErrorCode   ierr;
100   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
101 
102   PetscFunctionBegin;
103   if (lu->CleanUpSuperLU_Dist) {
104     /* Deallocate SuperLU_DIST storage */
105     if (lu->MatInputMode == GLOBAL) {
106       Destroy_CompCol_Matrix_dist(&lu->A_sup);
107     } else {
108       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
109       if ( lu->options.SolveInitialized ) {
110 #if defined(PETSC_USE_COMPLEX)
111         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
112 #else
113         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
114 #endif
115       }
116     }
117     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
118     ScalePermstructFree(&lu->ScalePermstruct);
119     LUstructFree(&lu->LUstruct);
120 
121     /* Release the SuperLU_DIST process grid. */
122     superlu_gridexit(&lu->grid);
123 
124     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
125   }
126 
127   ierr = MatConvert_SuperLU_DIST_AIJ(A,MATAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
128   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatSolve_SuperLU_DIST"
134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
135 {
136   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
137   PetscErrorCode   ierr;
138   PetscMPIInt      size;
139   PetscInt         m=A->rmap.N, N=A->cmap.N;
140   SuperLUStat_t    stat;
141   double           berr[1];
142   PetscScalar      *bptr;
143   PetscInt         info, nrhs=1;
144   Vec              x_seq;
145   IS               iden;
146   VecScatter       scat;
147 
148   PetscFunctionBegin;
149   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
150   if (size > 1) {
151     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
152       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
153       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
154       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
155       ierr = ISDestroy(iden);CHKERRQ(ierr);
156 
157       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
158       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
159       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
160     } else { /* distributed mat input */
161       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
162       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
163     }
164   } else { /* size == 1 */
165     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
166     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
167   }
168 
169   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
170 
171   PStatInit(&stat);        /* Initialize the statistics variables. */
172   if (lu->MatInputMode == GLOBAL) {
173 #if defined(PETSC_USE_COMPLEX)
174     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
175                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
176 #else
177     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
178                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
179 #endif
180   } else { /* distributed mat input */
181 #if defined(PETSC_USE_COMPLEX)
182     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
183 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
184     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
185 #else
186     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
187 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
188     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
189 #endif
190   }
191   if (lu->options.PrintStat) {
192      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
193   }
194   PStatFree(&stat);
195 
196   if (size > 1) {
197     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
198       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
199       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
200       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
201       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
202       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
203     } else {
204       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
205     }
206   } else {
207     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
208   }
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
214 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
215 {
216   Mat              *tseq,A_seq = PETSC_NULL;
217   Mat_SeqAIJ       *aa,*bb;
218   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
219   PetscErrorCode   ierr;
220   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
221                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
222   PetscMPIInt      size,rank;
223   SuperLUStat_t    stat;
224   double           *berr=0;
225   IS               isrow;
226   PetscLogDouble   time0,time,time_min,time_max;
227   Mat              F_diag=PETSC_NULL;
228 #if defined(PETSC_USE_COMPLEX)
229   doublecomplex    *av, *bv;
230 #else
231   double           *av, *bv;
232 #endif
233 
234   PetscFunctionBegin;
235   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
236   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
237 
238   if (lu->options.PrintStat) { /* collect time for mat conversion */
239     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
240     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
241   }
242 
243   if (lu->MatInputMode == GLOBAL) { /* global mat input */
244     if (size > 1) { /* convert mpi A to seq mat A */
245       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
246       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
247       ierr = ISDestroy(isrow);CHKERRQ(ierr);
248 
249       A_seq = *tseq;
250       ierr = PetscFree(tseq);CHKERRQ(ierr);
251       aa =  (Mat_SeqAIJ*)A_seq->data;
252     } else {
253       aa =  (Mat_SeqAIJ*)A->data;
254     }
255 
256     /* Convert Petsc NR matrix to SuperLU_DIST NC.
257        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
258     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
259       if (lu->FactPattern == SamePattern_SameRowPerm){
260         Destroy_CompCol_Matrix_dist(&lu->A_sup);
261         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
262         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
263       } else {
264         Destroy_CompCol_Matrix_dist(&lu->A_sup);
265         Destroy_LU(N, &lu->grid, &lu->LUstruct);
266         lu->options.Fact = SamePattern;
267       }
268     }
269 #if defined(PETSC_USE_COMPLEX)
270     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
271 #else
272     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
273 #endif
274 
275     /* Create compressed column matrix A_sup. */
276 #if defined(PETSC_USE_COMPLEX)
277     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
278 #else
279     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
280 #endif
281   } else { /* distributed mat input */
282     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
283     aa=(Mat_SeqAIJ*)(mat->A)->data;
284     bb=(Mat_SeqAIJ*)(mat->B)->data;
285     ai=aa->i; aj=aa->j;
286     bi=bb->i; bj=bb->j;
287 #if defined(PETSC_USE_COMPLEX)
288     av=(doublecomplex*)aa->a;
289     bv=(doublecomplex*)bb->a;
290 #else
291     av=aa->a;
292     bv=bb->a;
293 #endif
294     rstart = A->rmap.rstart;
295     nz     = aa->nz + bb->nz;
296     garray = mat->garray;
297 
298     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
299 #if defined(PETSC_USE_COMPLEX)
300       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
301 #else
302       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
303 #endif
304     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
305       if (lu->FactPattern == SamePattern_SameRowPerm){
306         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
307         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
308       } else {
309         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
310         lu->options.Fact = SamePattern;
311       }
312     }
313     nz = 0; irow = rstart;
314     for ( i=0; i<m; i++ ) {
315       lu->row[i] = nz;
316       countA = ai[i+1] - ai[i];
317       countB = bi[i+1] - bi[i];
318       ajj = aj + ai[i];  /* ptr to the beginning of this row */
319       bjj = bj + bi[i];
320 
321       /* B part, smaller col index */
322       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
323       jB = 0;
324       for (j=0; j<countB; j++){
325         jcol = garray[bjj[j]];
326         if (jcol > colA_start) {
327           jB = j;
328           break;
329         }
330         lu->col[nz] = jcol;
331         lu->val[nz++] = *bv++;
332         if (j==countB-1) jB = countB;
333       }
334 
335       /* A part */
336       for (j=0; j<countA; j++){
337         lu->col[nz] = rstart + ajj[j];
338         lu->val[nz++] = *av++;
339       }
340 
341       /* B part, larger col index */
342       for (j=jB; j<countB; j++){
343         lu->col[nz] = garray[bjj[j]];
344         lu->val[nz++] = *bv++;
345       }
346     }
347     lu->row[m] = nz;
348 #if defined(PETSC_USE_COMPLEX)
349     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
350 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
351 #else
352     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
353 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
354 #endif
355   }
356   if (lu->options.PrintStat) {
357     ierr = PetscGetTime(&time);CHKERRQ(ierr);
358     time0 = time - time0;
359   }
360 
361   /* Factor the matrix. */
362   PStatInit(&stat);   /* Initialize the statistics variables. */
363 
364   if (lu->MatInputMode == GLOBAL) { /* global mat input */
365 #if defined(PETSC_USE_COMPLEX)
366     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
367                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
368 #else
369     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
370                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
371 #endif
372   } else { /* distributed mat input */
373 #if defined(PETSC_USE_COMPLEX)
374     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
375 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
376     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
377 #else
378     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
379 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
380     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
381 #endif
382   }
383 
384   if (lu->MatInputMode == GLOBAL && size > 1){
385     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
386   }
387 
388   if (lu->options.PrintStat) {
389     if (size > 1){
390       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
391       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
392       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
393       time = time/size; /* average time */
394       if (!rank) {
395         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
396       }
397     } else {
398       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
399     }
400     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
401   }
402   PStatFree(&stat);
403   if (size > 1){
404     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
405     F_diag->assembled = PETSC_TRUE;
406   }
407   (*F)->assembled  = PETSC_TRUE;
408   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
409   PetscFunctionReturn(0);
410 }
411 
412 /* Note the Petsc r and c permutations are ignored */
413 #undef __FUNCT__
414 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
415 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
416 {
417   Mat               B;
418   Mat_SuperLU_DIST  *lu;
419   PetscErrorCode    ierr;
420   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
421   PetscMPIInt       size;
422   superlu_options_t options;
423   PetscTruth        flg;
424   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","PARMETIS"};
425   const char        *prtype[] = {"LargeDiag","NATURAL"};
426   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
427 
428   PetscFunctionBegin;
429   /* Create the factorization matrix */
430   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
431   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
432   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
433   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
434   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
435 
436   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
437   B->ops->solve            = MatSolve_SuperLU_DIST;
438   B->factor                = FACTOR_LU;
439 
440   lu = (Mat_SuperLU_DIST*)(B->spptr);
441 
442   /* Set the default input options:
443      options.Fact              = DOFACT;
444      options.Equil             = YES;
445      options.ParSymbFact       = NO;
446      options.ColPerm           = MMD_AT_PLUS_A;
447      options.RowPerm           = LargeDiag;
448      options.ReplaceTinyPivot  = YES;
449      options.IterRefine        = DOUBLE;
450      options.Trans             = NOTRANS;
451      options.SolveInitialized  = NO;
452      options.RefineInitialized = NO;
453      options.PrintStat         = YES;
454   */
455   set_default_options_dist(&options);
456 
457   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr);
458   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
459   /* Default num of process columns and rows */
460   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
461   if (!lu->npcol) lu->npcol = 1;
462   while (lu->npcol > 0) {
463     lu->nprow = PetscMPIIntCast(size/lu->npcol);
464     if (size == lu->nprow * lu->npcol) break;
465     lu->npcol --;
466   }
467 
468   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
469     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
470     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
471     if (size != lu->nprow * lu->npcol)
472       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
473 
474     lu->MatInputMode = DISTRIBUTED;
475     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
476     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
477 
478     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
479     if (!flg) {
480       options.Equil = NO;
481     }
482 
483     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
484     if (flg) {
485       switch (indx) {
486       case 0:
487         options.RowPerm = LargeDiag;
488         break;
489       case 1:
490         options.RowPerm = NOROWPERM;
491         break;
492       }
493     }
494 
495     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,4,pctype[0],&indx,&flg);CHKERRQ(ierr);
496     if (flg) {
497       switch (indx) {
498       case 0:
499         options.ColPerm = MMD_AT_PLUS_A;
500         break;
501       case 1:
502         options.ColPerm = NATURAL;
503         break;
504       case 2:
505         options.ColPerm = MMD_ATA;
506         break;
507       case 3:
508         options.ColPerm = PARMETIS;
509         break;
510       }
511     }
512 
513     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
514     if (!flg) {
515       options.ReplaceTinyPivot = NO;
516     }
517 
518     options.ParSymbFact = NO;
519     ierr = PetscOptionsTruth("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
520     if (flg){
521 #ifdef PETSC_HAVE_PARMETIS
522       options.ParSymbFact = YES;
523       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
524 #else
525       printf("parsymbfact needs PARMETIS");
526 #endif
527     }
528 
529     lu->FactPattern = SamePattern;
530     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
531     if (flg) {
532       switch (indx) {
533       case 0:
534         lu->FactPattern = SamePattern;
535         break;
536       case 1:
537         lu->FactPattern = SamePattern_SameRowPerm;
538         break;
539       }
540     }
541 
542     options.IterRefine = NOREFINE;
543     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
544     if (flg) {
545       options.IterRefine = DOUBLE;
546     }
547 
548     if (PetscLogPrintInfo) {
549       options.PrintStat = YES;
550     } else {
551       options.PrintStat = NO;
552     }
553     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
554                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
555   PetscOptionsEnd();
556 
557   /* Initialize the SuperLU process grid. */
558   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
559 
560   /* Initialize ScalePermstruct and LUstruct. */
561   ScalePermstructInit(M, N, &lu->ScalePermstruct);
562   LUstructInit(M, N, &lu->LUstruct);
563 
564   lu->options             = options;
565   lu->options.Fact        = DOFACT;
566   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
567   *F = B;
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
573 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
574   PetscErrorCode   ierr;
575   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
576 
577   PetscFunctionBegin;
578   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
579   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
580   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
581   PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
586 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
587 {
588   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
589   superlu_options_t options;
590   PetscErrorCode    ierr;
591 
592   PetscFunctionBegin;
593   /* check if matrix is superlu_dist type */
594   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
595 
596   options = lu->options;
597   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
598   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
599   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
600   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
601   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
602   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
603   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
604   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
605   if (options.ColPerm == NATURAL) {
606     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
607   } else if (options.ColPerm == MMD_AT_PLUS_A) {
608     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
609   } else if (options.ColPerm == MMD_ATA) {
610     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
611   } else if (options.ColPerm == PARMETIS) {
612     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");CHKERRQ(ierr);
613   } else {
614     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
615   }
616 
617   ierr = PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscTruths[options.ParSymbFact != NO]);CHKERRQ(ierr);
618 
619   if (lu->FactPattern == SamePattern){
620     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
621   } else {
622     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
623   }
624   PetscFunctionReturn(0);
625 }
626 
627 #undef __FUNCT__
628 #define __FUNCT__ "MatView_SuperLU_DIST"
629 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
630 {
631   PetscErrorCode    ierr;
632   PetscTruth        iascii;
633   PetscViewerFormat format;
634   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
635 
636   PetscFunctionBegin;
637   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
638 
639   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
640   if (iascii) {
641     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
642     if (format == PETSC_VIEWER_ASCII_INFO) {
643       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
644     }
645   }
646   PetscFunctionReturn(0);
647 }
648 
649 
650 EXTERN_C_BEGIN
651 #undef __FUNCT__
652 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST"
653 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
654 {
655   /* This routine is only called to convert to MATSUPERLU_DIST */
656   /* from MATSEQAIJ if A has a single process communicator */
657   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
658   PetscErrorCode   ierr;
659   PetscMPIInt      size;
660   MPI_Comm         comm;
661   Mat              B=*newmat;
662   Mat_SuperLU_DIST *lu;
663 
664   PetscFunctionBegin;
665   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
666   if (reuse == MAT_INITIAL_MATRIX) {
667     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
668     lu->MatDuplicate         = B->ops->duplicate;
669     lu->MatView              = B->ops->view;
670     lu->MatAssemblyEnd       = B->ops->assemblyend;
671     lu->MatLUFactorSymbolic  = B->ops->lufactorsymbolic;
672     lu->MatDestroy           = B->ops->destroy;
673   } else {
674     lu->MatDuplicate         = A->ops->duplicate;
675     lu->MatView              = A->ops->view;
676     lu->MatAssemblyEnd       = A->ops->assemblyend;
677     lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
678     lu->MatDestroy           = A->ops->destroy;
679   }
680   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
681 
682   B->spptr                 = (void*)lu;
683   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
684   B->ops->view             = MatView_SuperLU_DIST;
685   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
686   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
687   B->ops->destroy          = MatDestroy_SuperLU_DIST;
688 
689   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
690   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
691   if (size == 1) {
692     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
693     "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
694     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
695     "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
696   } else {
697     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
698                                              "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
699     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
700                                              "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
701   }
702   ierr = PetscInfo(A,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr);
703   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
704   *newmat = B;
705   PetscFunctionReturn(0);
706 }
707 EXTERN_C_END
708 
709 #undef __FUNCT__
710 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
711 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
712   PetscErrorCode   ierr;
713   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
714 
715   PetscFunctionBegin;
716   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
717   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
718   PetscFunctionReturn(0);
719 }
720 
721 /*MC
722   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
723   via the external package SuperLU_DIST.
724 
725   If SuperLU_DIST is installed (see the manual for
726   instructions on how to declare the existence of external packages),
727   a matrix type can be constructed which invokes SuperLU_DIST solvers.
728   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
729   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
730   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
731 
732   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
733   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
734   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
735   for communicators controlling multiple processes.  It is recommended that you call both of
736   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
737   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
738   without data copy; but this MUST be called AFTER the matrix values are set.
739 
740 
741 
742   Options Database Keys:
743 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
744 . -mat_superlu_dist_r <n> - number of rows in processor partition
745 . -mat_superlu_dist_c <n> - number of columns in processor partition
746 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
747 . -mat_superlu_dist_equil - equilibrate the matrix
748 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
749 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
750 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
751 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
752 . -mat_superlu_dist_iterrefine - use iterative refinement
753 - -mat_superlu_dist_statprint - print factorization information
754 
755    Level: beginner
756 
757 .seealso: PCLU
758 M*/
759 
760 EXTERN_C_BEGIN
761 #undef __FUNCT__
762 #define __FUNCT__ "MatCreate_SuperLU_DIST"
763 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
764 {
765   PetscErrorCode ierr;
766   PetscMPIInt    size;
767 
768   PetscFunctionBegin;
769   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
770   ierr = MatSetType(A,MATAIJ);CHKERRQ(ierr);
771   ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
772   PetscFunctionReturn(0);
773 }
774 EXTERN_C_END
775 
776