xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 7d0a6c19129e7069c8a40e210b34ed62989173db)
1 
2 /*
3         Provides an interface to the SuperLU_DIST_2.2 sparse solver
4 */
5 
6 #include "../src/mat/impls/aij/seq/aij.h"
7 #include "../src/mat/impls/aij/mpi/mpiaij.h"
8 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
9 #include "stdlib.h"
10 #endif
11 
12 #if defined(PETSC_USE_64BIT_INDICES)
13 /* ugly SuperLU_Dist variable telling it to use long long int */
14 #define _LONGINT
15 #endif
16 
17 EXTERN_C_BEGIN
18 #if defined(PETSC_USE_COMPLEX)
19 #include "superlu_zdefs.h"
20 #else
21 #include "superlu_ddefs.h"
22 #endif
23 EXTERN_C_END
24 
25 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
26 const char *SuperLU_MatInputModes[] = {"GLOBAL","DISTRIBUTED","SuperLU_MatInputMode","PETSC_",0};
27 
28 typedef struct {
29   int_t                   nprow,npcol,*row,*col;
30   gridinfo_t              grid;
31   superlu_options_t       options;
32   SuperMatrix             A_sup;
33   ScalePermstruct_t       ScalePermstruct;
34   LUstruct_t              LUstruct;
35   int                     StatPrint;
36   SuperLU_MatInputMode    MatInputMode;
37   SOLVEstruct_t           SOLVEstruct;
38   fact_t                  FactPattern;
39   MPI_Comm                comm_superlu;
40 #if defined(PETSC_USE_COMPLEX)
41   doublecomplex           *val;
42 #else
43   double                  *val;
44 #endif
45   PetscBool               matsolve_iscalled,matmatsolve_iscalled;
46   PetscBool  CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */
47 } Mat_SuperLU_DIST;
48 
49 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
50 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,Mat,const MatFactorInfo *);
51 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
52 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
53 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
54 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,Mat,IS,IS,const MatFactorInfo *);
55 extern PetscErrorCode MatDestroy_MPIAIJ(Mat);
56 
57 #undef __FUNCT__
58 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
59 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
60 {
61   PetscErrorCode   ierr;
62   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
63   PetscBool        flg;
64 
65   PetscFunctionBegin;
66   if (lu->CleanUpSuperLU_Dist) {
67     /* Deallocate SuperLU_DIST storage */
68     if (lu->MatInputMode == GLOBAL) {
69       Destroy_CompCol_Matrix_dist(&lu->A_sup);
70     } else {
71       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
72       if ( lu->options.SolveInitialized ) {
73 #if defined(PETSC_USE_COMPLEX)
74         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
75 #else
76         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
77 #endif
78       }
79     }
80     Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct);
81     ScalePermstructFree(&lu->ScalePermstruct);
82     LUstructFree(&lu->LUstruct);
83 
84     /* Release the SuperLU_DIST process grid. */
85     superlu_gridexit(&lu->grid);
86 
87     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
88   }
89 
90   ierr = PetscTypeCompare((PetscObject)A,MATSEQAIJ,&flg);CHKERRQ(ierr);
91   if (flg) {
92     ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
93   } else {
94     ierr = MatDestroy_MPIAIJ(A);CHKERRQ(ierr);
95   }
96   PetscFunctionReturn(0);
97 }
98 
99 #undef __FUNCT__
100 #define __FUNCT__ "MatSolve_SuperLU_DIST"
101 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
102 {
103   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
104   PetscErrorCode   ierr;
105   PetscMPIInt      size;
106   PetscInt         m=A->rmap->n,M=A->rmap->N,N=A->cmap->N;
107   SuperLUStat_t    stat;
108   double           berr[1];
109   PetscScalar      *bptr;
110   PetscInt         nrhs=1;
111   Vec              x_seq;
112   IS               iden;
113   VecScatter       scat;
114   int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
115 
116   PetscFunctionBegin;
117   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
118   if (size > 1 && lu->MatInputMode == GLOBAL) {
119     /* global mat input, convert b to x_seq */
120     ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
121     ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
122     ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
123     ierr = ISDestroy(iden);CHKERRQ(ierr);
124 
125     ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
126     ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
127     ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
128   } else { /* size==1 || distributed mat input */
129     if (lu->options.SolveInitialized && !lu->matsolve_iscalled){
130       /* see comments in MatMatSolve() */
131 #if defined(PETSC_USE_COMPLEX)
132       zSolveFinalize(&lu->options, &lu->SOLVEstruct);
133 #else
134       dSolveFinalize(&lu->options, &lu->SOLVEstruct);
135 #endif
136       lu->options.SolveInitialized = NO;
137     }
138     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
139     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
140   }
141 
142   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
143 
144   PStatInit(&stat);        /* Initialize the statistics variables. */
145   if (lu->MatInputMode == GLOBAL) {
146 #if defined(PETSC_USE_COMPLEX)
147     pzgssvx_ABglobal(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,M,nrhs,
148                    &lu->grid,&lu->LUstruct,berr,&stat,&info);
149 #else
150     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr,M,nrhs,
151                    &lu->grid,&lu->LUstruct,berr,&stat,&info);
152 #endif
153   } else { /* distributed mat input */
154 #if defined(PETSC_USE_COMPLEX)
155     pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,
156 	    &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
157 #else
158     pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,
159 	    &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
160 #endif
161   }
162   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
163 
164   if (lu->options.PrintStat) {
165      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
166   }
167   PStatFree(&stat);
168 
169   if (size > 1 && lu->MatInputMode == GLOBAL) {
170     /* convert seq x to mpi x */
171     ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
172     ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
173     ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
174     ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
175     ierr = VecDestroy(x_seq);CHKERRQ(ierr);
176   } else {
177     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
178     lu->matsolve_iscalled    = PETSC_TRUE;
179     lu->matmatsolve_iscalled = PETSC_FALSE;
180   }
181   PetscFunctionReturn(0);
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatMatSolve_SuperLU_DIST"
186 PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X)
187 {
188   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
189   PetscErrorCode   ierr;
190   PetscMPIInt      size;
191   PetscInt         M=A->rmap->N,m=A->rmap->n,nrhs;
192   SuperLUStat_t    stat;
193   double           berr[1];
194   PetscScalar      *bptr;
195   int              info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */
196 
197   PetscFunctionBegin;
198   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
199 
200   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
201   if (size > 1 && lu->MatInputMode == GLOBAL) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatInputMode=GLOBAL for nproc %d>1 is not supported",size);
202   /* size==1 or distributed mat input */
203   if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled){
204     /* communication pattern of SOLVEstruct is unlikely created for matmatsolve,
205        thus destroy it and create a new SOLVEstruct.
206        Otherwise it may result in memory corruption or incorrect solution
207        See src/mat/examples/tests/ex125.c */
208 #if defined(PETSC_USE_COMPLEX)
209     zSolveFinalize(&lu->options, &lu->SOLVEstruct);
210 #else
211     dSolveFinalize(&lu->options, &lu->SOLVEstruct);
212 #endif
213     lu->options.SolveInitialized  = NO;
214   }
215   ierr = MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
216 
217   ierr = MatGetSize(B_mpi,PETSC_NULL,&nrhs);CHKERRQ(ierr);
218 
219   PStatInit(&stat);        /* Initialize the statistics variables. */
220   ierr = MatGetArray(X,&bptr);CHKERRQ(ierr);
221   if (lu->MatInputMode == GLOBAL) { /* size == 1 */
222 #if defined(PETSC_USE_COMPLEX)
223     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, M, nrhs,
224                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
225 #else
226     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, M, nrhs,
227                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
228 #endif
229   } else { /* distributed mat input */
230 #if defined(PETSC_USE_COMPLEX)
231     pzgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,(doublecomplex*)bptr,m,nrhs,&lu->grid,
232 	    &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
233 #else
234     pdgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,bptr,m,nrhs,&lu->grid,
235 	    &lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info);
236 #endif
237   }
238   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
239   ierr = MatRestoreArray(X,&bptr);CHKERRQ(ierr);
240 
241   if (lu->options.PrintStat) {
242      PStatPrint(&lu->options, &stat, &lu->grid); /* Print the statistics. */
243   }
244   PStatFree(&stat);
245   lu->matsolve_iscalled    = PETSC_FALSE;
246   lu->matmatsolve_iscalled = PETSC_TRUE;
247   PetscFunctionReturn(0);
248 }
249 
250 
251 #undef __FUNCT__
252 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
253 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info)
254 {
255   Mat              *tseq,A_seq = PETSC_NULL;
256   Mat_SeqAIJ       *aa,*bb;
257   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(F)->spptr;
258   PetscErrorCode   ierr;
259   PetscInt         M=A->rmap->N,N=A->cmap->N,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
260                    m=A->rmap->n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
261   int              sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */
262   PetscMPIInt      size,rank;
263   SuperLUStat_t    stat;
264   double           *berr=0;
265   IS               isrow;
266   PetscLogDouble   time0,time,time_min,time_max;
267   Mat              F_diag=PETSC_NULL;
268 #if defined(PETSC_USE_COMPLEX)
269   doublecomplex    *av, *bv;
270 #else
271   double           *av, *bv;
272 #endif
273 
274   PetscFunctionBegin;
275   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
276   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
277 
278   if (lu->options.PrintStat) { /* collect time for mat conversion */
279     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
280     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
281   }
282 
283   if (lu->MatInputMode == GLOBAL) { /* global mat input */
284     if (size > 1) { /* convert mpi A to seq mat A */
285       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
286       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
287       ierr = ISDestroy(isrow);CHKERRQ(ierr);
288 
289       A_seq = *tseq;
290       ierr = PetscFree(tseq);CHKERRQ(ierr);
291       aa =  (Mat_SeqAIJ*)A_seq->data;
292     } else {
293       PetscBool  flg;
294       ierr = PetscTypeCompare((PetscObject)A,MATMPIAIJ,&flg);CHKERRQ(ierr);
295       if (flg) {
296         Mat_MPIAIJ *At = (Mat_MPIAIJ*)A->data;
297         A = At->A;
298       }
299       aa =  (Mat_SeqAIJ*)A->data;
300     }
301 
302     /* Convert Petsc NR matrix to SuperLU_DIST NC.
303        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
304     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
305       if (lu->FactPattern == SamePattern_SameRowPerm){
306         Destroy_CompCol_Matrix_dist(&lu->A_sup);
307         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
308         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
309       } else {
310         Destroy_CompCol_Matrix_dist(&lu->A_sup);
311         Destroy_LU(N, &lu->grid, &lu->LUstruct);
312         lu->options.Fact = SamePattern;
313       }
314     }
315 #if defined(PETSC_USE_COMPLEX)
316     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
317 #else
318     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
319 #endif
320 
321     /* Create compressed column matrix A_sup. */
322 #if defined(PETSC_USE_COMPLEX)
323     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
324 #else
325     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
326 #endif
327   } else { /* distributed mat input */
328     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
329     aa=(Mat_SeqAIJ*)(mat->A)->data;
330     bb=(Mat_SeqAIJ*)(mat->B)->data;
331     ai=aa->i; aj=aa->j;
332     bi=bb->i; bj=bb->j;
333 #if defined(PETSC_USE_COMPLEX)
334     av=(doublecomplex*)aa->a;
335     bv=(doublecomplex*)bb->a;
336 #else
337     av=aa->a;
338     bv=bb->a;
339 #endif
340     rstart = A->rmap->rstart;
341     nz     = aa->nz + bb->nz;
342     garray = mat->garray;
343 
344     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
345 #if defined(PETSC_USE_COMPLEX)
346       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
347 #else
348       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
349 #endif
350     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
351       if (lu->FactPattern == SamePattern_SameRowPerm){
352         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
353         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
354       } else {
355         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
356         lu->options.Fact = SamePattern;
357       }
358     }
359     nz = 0; irow = rstart;
360     for ( i=0; i<m; i++ ) {
361       lu->row[i] = nz;
362       countA = ai[i+1] - ai[i];
363       countB = bi[i+1] - bi[i];
364       ajj = aj + ai[i];  /* ptr to the beginning of this row */
365       bjj = bj + bi[i];
366 
367       /* B part, smaller col index */
368       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
369       jB = 0;
370       for (j=0; j<countB; j++){
371         jcol = garray[bjj[j]];
372         if (jcol > colA_start) {
373           jB = j;
374           break;
375         }
376         lu->col[nz] = jcol;
377         lu->val[nz++] = *bv++;
378         if (j==countB-1) jB = countB;
379       }
380 
381       /* A part */
382       for (j=0; j<countA; j++){
383         lu->col[nz] = rstart + ajj[j];
384         lu->val[nz++] = *av++;
385       }
386 
387       /* B part, larger col index */
388       for (j=jB; j<countB; j++){
389         lu->col[nz] = garray[bjj[j]];
390         lu->val[nz++] = *bv++;
391       }
392     }
393     lu->row[m] = nz;
394 #if defined(PETSC_USE_COMPLEX)
395     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
396 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
397 #else
398     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
399 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
400 #endif
401   }
402   if (lu->options.PrintStat) {
403     ierr = PetscGetTime(&time);CHKERRQ(ierr);
404     time0 = time - time0;
405   }
406 
407   /* Factor the matrix. */
408   PStatInit(&stat);   /* Initialize the statistics variables. */
409   if (lu->MatInputMode == GLOBAL) { /* global mat input */
410 #if defined(PETSC_USE_COMPLEX)
411     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
412                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
413 #else
414     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
415                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
416 #endif
417   } else { /* distributed mat input */
418 #if defined(PETSC_USE_COMPLEX)
419     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,
420 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
421     if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
422 #else
423     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, m, 0, &lu->grid,
424 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
425     if (sinfo) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
426 #endif
427   }
428 
429   if (lu->MatInputMode == GLOBAL && size > 1){
430     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
431   }
432 
433   if (lu->options.PrintStat) {
434     if (size > 1){
435       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
436       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
437       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
438       time = time/size; /* average time */
439       if (!rank) {
440         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
441       }
442     } else {
443       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
444     }
445     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
446   }
447   PStatFree(&stat);
448   if (size > 1){
449     F_diag = ((Mat_MPIAIJ *)(F)->data)->A;
450     F_diag->assembled = PETSC_TRUE;
451   }
452   (F)->assembled    = PETSC_TRUE;
453   (F)->preallocated = PETSC_TRUE;
454   lu->options.Fact  = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
455   PetscFunctionReturn(0);
456 }
457 
458 /* Note the Petsc r and c permutations are ignored */
459 #undef __FUNCT__
460 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
461 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
462 {
463   Mat_SuperLU_DIST  *lu = (Mat_SuperLU_DIST*)F->spptr;
464   PetscInt          M=A->rmap->N,N=A->cmap->N;
465 
466   PetscFunctionBegin;
467   /* Initialize the SuperLU process grid. */
468   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
469 
470   /* Initialize ScalePermstruct and LUstruct. */
471   ScalePermstructInit(M, N, &lu->ScalePermstruct);
472   LUstructInit(M, N, &lu->LUstruct);
473   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST;
474   F->ops->solve           = MatSolve_SuperLU_DIST;
475   F->ops->matsolve        = MatMatSolve_SuperLU_DIST;
476   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
477   PetscFunctionReturn(0);
478 }
479 
480 EXTERN_C_BEGIN
481 #undef __FUNCT__
482 #define __FUNCT__ "MatFactorGetSolverPackage_aij_superlu_dist"
483 PetscErrorCode MatFactorGetSolverPackage_aij_superlu_dist(Mat A,const MatSolverPackage *type)
484 {
485   PetscFunctionBegin;
486   *type = MATSOLVERSUPERLU_DIST;
487   PetscFunctionReturn(0);
488 }
489 EXTERN_C_END
490 
491 #undef __FUNCT__
492 #define __FUNCT__ "MatGetFactor_aij_superlu_dist"
493 PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
494 {
495   Mat               B;
496   Mat_SuperLU_DIST  *lu;
497   PetscErrorCode    ierr;
498   PetscInt          M=A->rmap->N,N=A->cmap->N,indx;
499   PetscMPIInt       size;
500   superlu_options_t options;
501   PetscBool         flg;
502   const char        *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"};
503   const char        *rowperm[] = {"LargeDiag","NATURAL"};
504   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
505 
506   PetscFunctionBegin;
507   /* Create the factorization matrix */
508   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
509   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr);
510   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
511   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
512   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
513 
514   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
515   B->ops->view             = MatView_SuperLU_DIST;
516   B->ops->destroy          = MatDestroy_SuperLU_DIST;
517   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_aij_superlu_dist",MatFactorGetSolverPackage_aij_superlu_dist);CHKERRQ(ierr);
518   B->factortype            = MAT_FACTOR_LU;
519 
520   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
521   B->spptr = lu;
522 
523   /* Set the default input options:
524      options.Fact              = DOFACT;
525      options.Equil             = YES;
526      options.ParSymbFact       = NO;
527      options.ColPerm           = METIS_AT_PLUS_A;
528      options.RowPerm           = LargeDiag;
529      options.ReplaceTinyPivot  = YES;
530      options.IterRefine        = DOUBLE;
531      options.Trans             = NOTRANS;
532      options.SolveInitialized  = NO; -hold the communication pattern used MatSolve() and MatMatSolve()
533      options.RefineInitialized = NO;
534      options.PrintStat         = YES;
535   */
536   set_default_options_dist(&options);
537 
538   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(lu->comm_superlu));CHKERRQ(ierr);
539   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
540   /* Default num of process columns and rows */
541   lu->npcol = PetscMPIIntCast((PetscInt)(0.5 + sqrt((PetscReal)size)));
542   if (!lu->npcol) lu->npcol = 1;
543   while (lu->npcol > 0) {
544     lu->nprow = PetscMPIIntCast(size/lu->npcol);
545     if (size == lu->nprow * lu->npcol) break;
546     lu->npcol --;
547   }
548 
549   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
550     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
551     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
552     if (size != lu->nprow * lu->npcol) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
553 
554     lu->MatInputMode = DISTRIBUTED;
555     ierr = PetscOptionsEnum("-mat_superlu_dist_matinput","Matrix input mode (global or distributed)","None",SuperLU_MatInputModes,(PetscEnum)lu->MatInputMode,(PetscEnum*)&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
556     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
557 
558     ierr = PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
559     if (!flg) {
560       options.Equil = NO;
561     }
562 
563     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,2,rowperm[0],&indx,&flg);CHKERRQ(ierr);
564     if (flg) {
565       switch (indx) {
566       case 0:
567         options.RowPerm = LargeDiag;
568         break;
569       case 1:
570         options.RowPerm = NOROWPERM;
571         break;
572       }
573     }
574 
575     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg);CHKERRQ(ierr);
576     if (flg) {
577       switch (indx) {
578       case 0:
579         options.ColPerm = NATURAL;
580         break;
581       case 1:
582         options.ColPerm = MMD_AT_PLUS_A;
583         break;
584       case 2:
585         options.ColPerm = MMD_ATA;
586         break;
587       case 3:
588         options.ColPerm = METIS_AT_PLUS_A;
589         break;
590       case 4:
591         options.ColPerm = PARMETIS; /* only works for np>1 */
592         break;
593       default:
594         SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation");
595       }
596     }
597 
598     ierr = PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
599     if (!flg) {
600       options.ReplaceTinyPivot = NO;
601     }
602 
603     options.ParSymbFact = NO;
604     ierr = PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
605     if (flg){
606 #ifdef PETSC_HAVE_PARMETIS
607       options.ParSymbFact = YES;
608       options.ColPerm     = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */
609 #else
610       printf("parsymbfact needs PARMETIS");
611 #endif
612     }
613 
614     lu->FactPattern = SamePattern;
615     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
616     if (flg) {
617       switch (indx) {
618       case 0:
619         lu->FactPattern = SamePattern;
620         break;
621       case 1:
622         lu->FactPattern = SamePattern_SameRowPerm;
623         break;
624       }
625     }
626 
627     options.IterRefine = NOREFINE;
628     ierr = PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
629     if (flg) {
630       options.IterRefine = DOUBLE;
631     }
632 
633     if (PetscLogPrintInfo) {
634       options.PrintStat = YES;
635     } else {
636       options.PrintStat = NO;
637     }
638     ierr = PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",
639                               (PetscBool)options.PrintStat,(PetscBool*)&options.PrintStat,0);CHKERRQ(ierr);
640   PetscOptionsEnd();
641 
642   lu->options              = options;
643   lu->options.Fact         = DOFACT;
644   lu->matsolve_iscalled    = PETSC_FALSE;
645   lu->matmatsolve_iscalled = PETSC_FALSE;
646   *F = B;
647   PetscFunctionReturn(0);
648 }
649 
650 EXTERN_C_BEGIN
651 #undef __FUNCT__
652 #define __FUNCT__ "MatGetFactor_seqaij_superlu_dist"
653 PetscErrorCode MatGetFactor_seqaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
654 {
655   PetscErrorCode ierr;
656 
657   PetscFunctionBegin;
658   ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr);
659   PetscFunctionReturn(0);
660 }
661 EXTERN_C_END
662 
663 EXTERN_C_BEGIN
664 #undef __FUNCT__
665 #define __FUNCT__ "MatGetFactor_mpiaij_superlu_dist"
666 PetscErrorCode MatGetFactor_mpiaij_superlu_dist(Mat A,MatFactorType ftype,Mat *F)
667 {
668   PetscErrorCode ierr;
669 
670   PetscFunctionBegin;
671   ierr = MatGetFactor_aij_superlu_dist(A,ftype,F);CHKERRQ(ierr);
672   PetscFunctionReturn(0);
673 }
674 EXTERN_C_END
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
678 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
679 {
680   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
681   superlu_options_t options;
682   PetscErrorCode    ierr;
683 
684   PetscFunctionBegin;
685   /* check if matrix is superlu_dist type */
686   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
687 
688   options = lu->options;
689   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
690   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
691   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscBools[options.Equil != NO]);CHKERRQ(ierr);
692   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
693   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
694   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscBools[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
695   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
696   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
697 
698   switch(options.ColPerm){
699   case NATURAL:
700     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
701     break;
702   case MMD_AT_PLUS_A:
703     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
704     break;
705   case MMD_ATA:
706     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
707     break;
708   case METIS_AT_PLUS_A:
709     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation METIS_AT_PLUS_A\n");CHKERRQ(ierr);
710     break;
711   case PARMETIS:
712     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation PARMETIS\n");CHKERRQ(ierr);
713     break;
714   default:
715     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation");
716   }
717 
718   ierr = PetscViewerASCIIPrintf(viewer,"  Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO]);CHKERRQ(ierr);
719 
720   if (lu->FactPattern == SamePattern){
721     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
722   } else {
723     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
724   }
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatView_SuperLU_DIST"
730 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
731 {
732   PetscErrorCode    ierr;
733   PetscBool         iascii;
734   PetscViewerFormat format;
735 
736   PetscFunctionBegin;
737   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
738   if (iascii) {
739     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
740     if (format == PETSC_VIEWER_ASCII_INFO) {
741       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
742     }
743   }
744   PetscFunctionReturn(0);
745 }
746 
747 
748 /*MC
749   MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization
750 
751    Works with AIJ matrices
752 
753   Options Database Keys:
754 + -mat_superlu_dist_r <n> - number of rows in processor partition
755 . -mat_superlu_dist_c <n> - number of columns in processor partition
756 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
757 . -mat_superlu_dist_equil - equilibrate the matrix
758 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
759 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
760 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
761 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
762 . -mat_superlu_dist_iterrefine - use iterative refinement
763 - -mat_superlu_dist_statprint - print factorization information
764 
765    Level: beginner
766 
767 .seealso: PCLU
768 
769 .seealso: PCFactorSetMatSolverPackage(), MatSolverPackage
770 
771 M*/
772 
773