xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 593c1905dad567ae273cd337623ecaef31b3e817)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum { GLOBAL,DISTRIBUTED
22 } SuperLU_MatInputMode;
23 
24 typedef struct {
25   int_t                   nprow,npcol,*row,*col;
26   gridinfo_t              grid;
27   superlu_options_t       options;
28   SuperMatrix             A_sup;
29   ScalePermstruct_t       ScalePermstruct;
30   LUstruct_t              LUstruct;
31   int                     StatPrint;
32   int                     MatInputMode;
33   SOLVEstruct_t           SOLVEstruct;
34   fact_t                  FactPattern;
35   MPI_Comm                comm_superlu;
36 #if defined(PETSC_USE_COMPLEX)
37   doublecomplex           *val;
38 #else
39   double                  *val;
40 #endif
41 
42   /* A few function pointers for inheritance */
43   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   PetscErrorCode (*MatView)(Mat,PetscViewer);
45   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
55 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *);
56 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
57 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
58 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType);
59 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
60 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *);
61 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *);
62 
63 EXTERN_C_BEGIN
64 #undef __FUNCT__
65 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ"
66 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
67 {
68   PetscErrorCode   ierr;
69   Mat              B=*newmat;
70   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
71 
72   PetscFunctionBegin;
73   if (reuse == MAT_INITIAL_MATRIX) {
74     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
75   }
76   /* Reset the original function pointers */
77   B->ops->duplicate        = lu->MatDuplicate;
78   B->ops->view             = lu->MatView;
79   B->ops->assemblyend      = lu->MatAssemblyEnd;
80   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
81   B->ops->destroy          = lu->MatDestroy;
82 
83   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
84   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
85   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
86   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
87 
88   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
89   *newmat = B;
90   PetscFunctionReturn(0);
91 }
92 EXTERN_C_END
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
96 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
97 {
98   PetscErrorCode   ierr;
99   PetscMPIInt      size;
100   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
101 
102   PetscFunctionBegin;
103   if (lu->CleanUpSuperLU_Dist) {
104     /* Deallocate SuperLU_DIST storage */
105     if (lu->MatInputMode == GLOBAL) {
106       Destroy_CompCol_Matrix_dist(&lu->A_sup);
107     } else {
108       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
109       if ( lu->options.SolveInitialized ) {
110 #if defined(PETSC_USE_COMPLEX)
111         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
112 #else
113         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
114 #endif
115       }
116     }
117     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
118     ScalePermstructFree(&lu->ScalePermstruct);
119     LUstructFree(&lu->LUstruct);
120 
121     /* Release the SuperLU_DIST process grid. */
122     superlu_gridexit(&lu->grid);
123 
124     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
125   }
126 
127   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
128   if (size == 1) {
129     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
130   } else {
131     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
132   }
133   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatSolve_SuperLU_DIST"
139 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
140 {
141   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
142   PetscErrorCode   ierr;
143   PetscMPIInt      size;
144   PetscInt         m=A->rmap.N, N=A->cmap.N;
145   SuperLUStat_t    stat;
146   double           berr[1];
147   PetscScalar      *bptr;
148   PetscInt         info, nrhs=1;
149   Vec              x_seq;
150   IS               iden;
151   VecScatter       scat;
152 
153   PetscFunctionBegin;
154   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
155   if (size > 1) {
156     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
157       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
158       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
159       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
160       ierr = ISDestroy(iden);CHKERRQ(ierr);
161 
162       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
163       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
164       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
165     } else { /* distributed mat input */
166       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
167       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
168     }
169   } else { /* size == 1 */
170     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
171     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
172   }
173 
174   if (lu->options.Fact != FACTORED)
175     SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
176 
177   PStatInit(&stat);        /* Initialize the statistics variables. */
178   if (lu->MatInputMode == GLOBAL) {
179 #if defined(PETSC_USE_COMPLEX)
180     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
181                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
182 #else
183     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
184                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
185 #endif
186   } else { /* distributed mat input */
187 #if defined(PETSC_USE_COMPLEX)
188     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
189 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
190     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
191 #else
192     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
193 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
194     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
195 #endif
196   }
197   if (lu->options.PrintStat) {
198      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
199   }
200   PStatFree(&stat);
201 
202   if (size > 1) {
203     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
204       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
205       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
206       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
207       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
208       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
209     } else {
210       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
211     }
212   } else {
213     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
220 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
221 {
222   Mat              *tseq,A_seq = PETSC_NULL;
223   Mat_SeqAIJ       *aa,*bb;
224   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
225   PetscErrorCode   ierr;
226   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
227                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
228   PetscMPIInt      size,rank;
229   SuperLUStat_t    stat;
230   double           *berr=0;
231   IS               isrow;
232   PetscLogDouble   time0,time,time_min,time_max;
233   Mat              F_diag=PETSC_NULL;
234 #if defined(PETSC_USE_COMPLEX)
235   doublecomplex    *av, *bv;
236 #else
237   double           *av, *bv;
238 #endif
239 
240   PetscFunctionBegin;
241   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
242   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
243 
244   if (lu->options.PrintStat) { /* collect time for mat conversion */
245     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
246     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
247   }
248 
249   if (lu->MatInputMode == GLOBAL) { /* global mat input */
250     if (size > 1) { /* convert mpi A to seq mat A */
251       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
252       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
253       ierr = ISDestroy(isrow);CHKERRQ(ierr);
254 
255       A_seq = *tseq;
256       ierr = PetscFree(tseq);CHKERRQ(ierr);
257       aa =  (Mat_SeqAIJ*)A_seq->data;
258     } else {
259       aa =  (Mat_SeqAIJ*)A->data;
260     }
261 
262     /* Convert Petsc NR matrix to SuperLU_DIST NC.
263        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
264     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
265       if (lu->FactPattern == SamePattern_SameRowPerm){
266         Destroy_CompCol_Matrix_dist(&lu->A_sup);
267         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
268         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
269       } else {
270         Destroy_CompCol_Matrix_dist(&lu->A_sup);
271         Destroy_LU(N, &lu->grid, &lu->LUstruct);
272         lu->options.Fact = SamePattern;
273       }
274     }
275 #if defined(PETSC_USE_COMPLEX)
276     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
277 #else
278     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
279 #endif
280 
281     /* Create compressed column matrix A_sup. */
282 #if defined(PETSC_USE_COMPLEX)
283     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
284 #else
285     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
286 #endif
287   } else { /* distributed mat input */
288     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
289     aa=(Mat_SeqAIJ*)(mat->A)->data;
290     bb=(Mat_SeqAIJ*)(mat->B)->data;
291     ai=aa->i; aj=aa->j;
292     bi=bb->i; bj=bb->j;
293 #if defined(PETSC_USE_COMPLEX)
294     av=(doublecomplex*)aa->a;
295     bv=(doublecomplex*)bb->a;
296 #else
297     av=aa->a;
298     bv=bb->a;
299 #endif
300     rstart = A->rmap.rstart;
301     nz     = aa->nz + bb->nz;
302     garray = mat->garray;
303 
304     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
305 #if defined(PETSC_USE_COMPLEX)
306       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
307 #else
308       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
309 #endif
310     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
311       if (lu->FactPattern == SamePattern_SameRowPerm){
312         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
313         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
314       } else {
315         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
316         lu->options.Fact = SamePattern;
317       }
318     }
319     nz = 0; irow = rstart;
320     for ( i=0; i<m; i++ ) {
321       lu->row[i] = nz;
322       countA = ai[i+1] - ai[i];
323       countB = bi[i+1] - bi[i];
324       ajj = aj + ai[i];  /* ptr to the beginning of this row */
325       bjj = bj + bi[i];
326 
327       /* B part, smaller col index */
328       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
329       jB = 0;
330       for (j=0; j<countB; j++){
331         jcol = garray[bjj[j]];
332         if (jcol > colA_start) {
333           jB = j;
334           break;
335         }
336         lu->col[nz] = jcol;
337         lu->val[nz++] = *bv++;
338         if (j==countB-1) jB = countB;
339       }
340 
341       /* A part */
342       for (j=0; j<countA; j++){
343         lu->col[nz] = rstart + ajj[j];
344         lu->val[nz++] = *av++;
345       }
346 
347       /* B part, larger col index */
348       for (j=jB; j<countB; j++){
349         lu->col[nz] = garray[bjj[j]];
350         lu->val[nz++] = *bv++;
351       }
352     }
353     lu->row[m] = nz;
354 #if defined(PETSC_USE_COMPLEX)
355     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
356 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
357 #else
358     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
359 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
360 #endif
361   }
362   if (lu->options.PrintStat) {
363     ierr = PetscGetTime(&time);CHKERRQ(ierr);
364     time0 = time - time0;
365   }
366 
367   /* Factor the matrix. */
368   PStatInit(&stat);   /* Initialize the statistics variables. */
369 
370   if (lu->MatInputMode == GLOBAL) { /* global mat input */
371 #if defined(PETSC_USE_COMPLEX)
372     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
373                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
374 #else
375     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
376                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
377 #endif
378   } else { /* distributed mat input */
379 #if defined(PETSC_USE_COMPLEX)
380     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
381 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
382     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
383 #else
384     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
385 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
386     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
387 #endif
388   }
389 
390   if (lu->MatInputMode == GLOBAL && size > 1){
391     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
392   }
393 
394   if (lu->options.PrintStat) {
395     if (size > 1){
396       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
397       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
398       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
399       time = time/size; /* average time */
400       if (!rank)
401         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
402                               %g / %g / %g\n",time_max,time_min,time);
403     } else {
404       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
405                               %g\n",time0);
406     }
407 
408     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
409   }
410   PStatFree(&stat);
411   if (size > 1){
412     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
413     F_diag->assembled = PETSC_TRUE;
414   }
415   (*F)->assembled  = PETSC_TRUE;
416   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
417   PetscFunctionReturn(0);
418 }
419 
420 /* Note the Petsc r and c permutations are ignored */
421 #undef __FUNCT__
422 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
423 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
424 {
425   Mat               B;
426   Mat_SuperLU_DIST  *lu;
427   PetscErrorCode    ierr;
428   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
429   PetscMPIInt       size;
430   superlu_options_t options;
431   PetscTruth        flg;
432   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
433   const char        *prtype[] = {"LargeDiag","NATURAL"};
434   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
435 
436   PetscFunctionBegin;
437   /* Create the factorization matrix */
438   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
439   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
440   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
441   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
442   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
443 
444   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
445   B->ops->solve            = MatSolve_SuperLU_DIST;
446   B->factor                = FACTOR_LU;
447 
448   lu = (Mat_SuperLU_DIST*)(B->spptr);
449 
450   /*   Set the default input options:
451         options.Fact = DOFACT;
452         options.Equil = YES;
453         options.ColPerm = MMD_AT_PLUS_A;
454         options.RowPerm = LargeDiag;
455         options.ReplaceTinyPivot = YES;
456         options.Trans = NOTRANS;
457         options.IterRefine = DOUBLE;
458         options.SolveInitialized = NO;
459         options.RefineInitialized = NO;
460         options.PrintStat = YES;
461   */
462   set_default_options_dist(&options);
463 
464   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
465   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
466 
467   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
468     lu->npcol = (PetscMPIInt)(sqrt((PetscReal)size)); /* Default num of process columns */
469     if (!lu->npcol) lu->npcol = 1;
470     lu->nprow = (PetscMPIInt)(size/lu->npcol);        /* Default num of process rows */
471     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
472     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
473     if (size != lu->nprow * lu->npcol)
474       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
475 
476     lu->MatInputMode = DISTRIBUTED;
477     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
478     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
479 
480     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
481     if (!flg) {
482       options.Equil = NO;
483     }
484 
485     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
486     if (flg) {
487       switch (indx) {
488       case 0:
489         options.RowPerm = LargeDiag;
490         break;
491       case 1:
492         options.RowPerm = NOROWPERM;
493         break;
494       }
495     }
496 
497     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);CHKERRQ(ierr);
498     if (flg) {
499       switch (indx) {
500       case 0:
501         options.ColPerm = MMD_AT_PLUS_A;
502         break;
503       case 1:
504         options.ColPerm = NATURAL;
505         break;
506       case 2:
507         options.ColPerm = MMD_ATA;
508         break;
509       }
510     }
511 
512     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
513     if (!flg) {
514       options.ReplaceTinyPivot = NO;
515     }
516 
517     lu->FactPattern = SamePattern;
518     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
519     if (flg) {
520       switch (indx) {
521       case 0:
522         lu->FactPattern = SamePattern;
523         break;
524       case 1:
525         lu->FactPattern = SamePattern_SameRowPerm;
526         break;
527       }
528     }
529 
530     options.IterRefine = NOREFINE;
531     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
532     if (flg) {
533       options.IterRefine = DOUBLE;
534     }
535 
536     if (PetscLogPrintInfo) {
537       options.PrintStat = YES;
538     } else {
539       options.PrintStat = NO;
540     }
541     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
542                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
543   PetscOptionsEnd();
544 
545   /* Initialize the SuperLU process grid. */
546   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
547 
548   /* Initialize ScalePermstruct and LUstruct. */
549   ScalePermstructInit(M, N, &lu->ScalePermstruct);
550   LUstructInit(M, N, &lu->LUstruct);
551 
552   lu->options             = options;
553   lu->options.Fact        = DOFACT;
554   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
555   *F = B;
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
561 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
562   PetscErrorCode   ierr;
563   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
564 
565   PetscFunctionBegin;
566   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
567   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
568   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
574 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
575 {
576   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
577   superlu_options_t options;
578   PetscErrorCode    ierr;
579 
580   PetscFunctionBegin;
581   /* check if matrix is superlu_dist type */
582   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
583 
584   options = lu->options;
585   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
586   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
587   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
588   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
589   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
590   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
591   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
592   if (options.ColPerm == NATURAL) {
593     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
594   } else if (options.ColPerm == MMD_AT_PLUS_A) {
595     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
596   } else if (options.ColPerm == MMD_ATA) {
597     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
598   } else {
599     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
600   }
601 
602   if (lu->FactPattern == SamePattern){
603     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
604   } else {
605     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
606   }
607   PetscFunctionReturn(0);
608 }
609 
610 #undef __FUNCT__
611 #define __FUNCT__ "MatView_SuperLU_DIST"
612 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
613 {
614   PetscErrorCode    ierr;
615   PetscTruth        iascii;
616   PetscViewerFormat format;
617   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
618 
619   PetscFunctionBegin;
620   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
621 
622   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
623   if (iascii) {
624     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
625     if (format == PETSC_VIEWER_ASCII_INFO) {
626       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
627     }
628   }
629   PetscFunctionReturn(0);
630 }
631 
632 
633 EXTERN_C_BEGIN
634 #undef __FUNCT__
635 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST"
636 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
637 {
638   /* This routine is only called to convert to MATSUPERLU_DIST */
639   /* from MATSEQAIJ if A has a single process communicator */
640   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
641   PetscErrorCode   ierr;
642   PetscMPIInt      size;
643   MPI_Comm         comm;
644   Mat              B=*newmat;
645   Mat_SuperLU_DIST *lu;
646 
647   PetscFunctionBegin;
648   if (reuse == MAT_INITIAL_MATRIX) {
649     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
650   }
651 
652   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
653   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
654 
655   lu->MatDuplicate         = A->ops->duplicate;
656   lu->MatView              = A->ops->view;
657   lu->MatAssemblyEnd       = A->ops->assemblyend;
658   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
659   lu->MatDestroy           = A->ops->destroy;
660   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
661 
662   B->spptr                 = (void*)lu;
663   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
664   B->ops->view             = MatView_SuperLU_DIST;
665   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
666   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
667   B->ops->destroy          = MatDestroy_SuperLU_DIST;
668   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
669   if (size == 1) {
670     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
671     "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
672     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
673     "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
674   } else {
675     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
676                                              "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
677     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
678                                              "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
679   }
680   ierr = PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr);
681   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
682   *newmat = B;
683   PetscFunctionReturn(0);
684 }
685 EXTERN_C_END
686 
687 #undef __FUNCT__
688 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
689 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
690   PetscErrorCode   ierr;
691   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
692 
693   PetscFunctionBegin;
694   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
695   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
696   PetscFunctionReturn(0);
697 }
698 
699 /*MC
700   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
701   via the external package SuperLU_DIST.
702 
703   If SuperLU_DIST is installed (see the manual for
704   instructions on how to declare the existence of external packages),
705   a matrix type can be constructed which invokes SuperLU_DIST solvers.
706   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
707 
708   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
709   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
710   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
711   for communicators controlling multiple processes.  It is recommended that you call both of
712   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
713   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
714   without data copy.
715 
716   Options Database Keys:
717 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
718 . -mat_superlu_dist_r <n> - number of rows in processor partition
719 . -mat_superlu_dist_c <n> - number of columns in processor partition
720 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
721 . -mat_superlu_dist_equil - equilibrate the matrix
722 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
723 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
724 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
725 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
726 . -mat_superlu_dist_iterrefine - use iterative refinement
727 - -mat_superlu_dist_statprint - print factorization information
728 
729    Level: beginner
730 
731 .seealso: PCLU
732 M*/
733 
734 EXTERN_C_BEGIN
735 #undef __FUNCT__
736 #define __FUNCT__ "MatCreate_SuperLU_DIST"
737 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
738 {
739   PetscErrorCode ierr;
740   PetscMPIInt    size;
741 
742   PetscFunctionBegin;
743   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
744   if (size == 1) {
745     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
746   } else {
747     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
748     /*  A_diag = 0x0 ???  -- do we need it?
749     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
750     ierr = MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
751     */
752   }
753   ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
754   PetscFunctionReturn(0);
755 }
756 EXTERN_C_END
757 
758