xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 8cc2d5df8a4e7cb3098516166e0a894700abf1e9)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum { GLOBAL,DISTRIBUTED
22 } SuperLU_MatInputMode;
23 
24 typedef struct {
25   int_t                   nprow,npcol,*row,*col;
26   gridinfo_t              grid;
27   superlu_options_t       options;
28   SuperMatrix             A_sup;
29   ScalePermstruct_t       ScalePermstruct;
30   LUstruct_t              LUstruct;
31   int                     StatPrint;
32   int                     MatInputMode;
33   SOLVEstruct_t           SOLVEstruct;
34   fact_t                  FactPattern;
35   MPI_Comm                comm_superlu;
36 #if defined(PETSC_USE_COMPLEX)
37   doublecomplex           *val;
38 #else
39   double                  *val;
40 #endif
41 
42   /* A few function pointers for inheritance */
43   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   PetscErrorCode (*MatView)(Mat,PetscViewer);
45   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
55 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *);
56 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
57 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
58 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType);
59 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
60 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *);
61 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *);
62 
63 EXTERN_C_BEGIN
64 #undef __FUNCT__
65 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ"
66 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
67 {
68   PetscErrorCode   ierr;
69   Mat              B=*newmat;
70   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
71 
72   PetscFunctionBegin;
73   if (reuse == MAT_INITIAL_MATRIX) {
74     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
75   }
76   /* Reset the original function pointers */
77   B->ops->duplicate        = lu->MatDuplicate;
78   B->ops->view             = lu->MatView;
79   B->ops->assemblyend      = lu->MatAssemblyEnd;
80   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
81   B->ops->destroy          = lu->MatDestroy;
82 
83   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
84   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
85   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
86   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
87 
88   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
89   *newmat = B;
90   PetscFunctionReturn(0);
91 }
92 EXTERN_C_END
93 
94 #undef __FUNCT__
95 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
96 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
97 {
98   PetscErrorCode   ierr;
99   PetscMPIInt      size;
100   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
101 
102   PetscFunctionBegin;
103   if (lu->CleanUpSuperLU_Dist) {
104     /* Deallocate SuperLU_DIST storage */
105     if (lu->MatInputMode == GLOBAL) {
106       Destroy_CompCol_Matrix_dist(&lu->A_sup);
107     } else {
108       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
109       if ( lu->options.SolveInitialized ) {
110 #if defined(PETSC_USE_COMPLEX)
111         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
112 #else
113         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
114 #endif
115       }
116     }
117     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
118     ScalePermstructFree(&lu->ScalePermstruct);
119     LUstructFree(&lu->LUstruct);
120 
121     /* Release the SuperLU_DIST process grid. */
122     superlu_gridexit(&lu->grid);
123 
124     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
125   }
126 
127   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
128   if (size == 1) {
129     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
130   } else {
131     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
132   }
133   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
134   PetscFunctionReturn(0);
135 }
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatSolve_SuperLU_DIST"
139 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
140 {
141   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
142   PetscErrorCode   ierr;
143   PetscMPIInt      size;
144   PetscInt         m=A->rmap.N, N=A->cmap.N;
145   SuperLUStat_t    stat;
146   double           berr[1];
147   PetscScalar      *bptr;
148   PetscInt         info, nrhs=1;
149   Vec              x_seq;
150   IS               iden;
151   VecScatter       scat;
152 
153   PetscFunctionBegin;
154   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
155   if (size > 1) {
156     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
157       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
158       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
159       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
160       ierr = ISDestroy(iden);CHKERRQ(ierr);
161 
162       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
163       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
164       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
165     } else { /* distributed mat input */
166       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
167       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
168     }
169   } else { /* size == 1 */
170     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
171     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
172   }
173 
174   if (lu->options.Fact != FACTORED)
175     SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
176 
177   PStatInit(&stat);        /* Initialize the statistics variables. */
178   if (lu->MatInputMode == GLOBAL) {
179 #if defined(PETSC_USE_COMPLEX)
180     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
181                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
182 #else
183     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
184                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
185 #endif
186   } else { /* distributed mat input */
187 #if defined(PETSC_USE_COMPLEX)
188     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
189 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
190     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
191 #else
192     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
193 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
194     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
195 #endif
196   }
197   if (lu->options.PrintStat) {
198      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
199   }
200   PStatFree(&stat);
201 
202   if (size > 1) {
203     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
204       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
205       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
206       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
207       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
208       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
209     } else {
210       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
211     }
212   } else {
213     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
220 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
221 {
222   Mat              *tseq,A_seq = PETSC_NULL;
223   Mat_SeqAIJ       *aa,*bb;
224   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
225   PetscErrorCode   ierr;
226   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
227                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
228   PetscMPIInt      size,rank;
229   SuperLUStat_t    stat;
230   double           *berr=0;
231   IS               isrow;
232   PetscLogDouble   time0,time,time_min,time_max;
233   Mat              F_diag=PETSC_NULL;
234 #if defined(PETSC_USE_COMPLEX)
235   doublecomplex    *av, *bv;
236 #else
237   double           *av, *bv;
238 #endif
239 
240   PetscFunctionBegin;
241   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
242   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
243 
244   if (lu->options.PrintStat) { /* collect time for mat conversion */
245     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
246     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
247   }
248 
249   if (lu->MatInputMode == GLOBAL) { /* global mat input */
250     if (size > 1) { /* convert mpi A to seq mat A */
251       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
252       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
253       ierr = ISDestroy(isrow);CHKERRQ(ierr);
254 
255       A_seq = *tseq;
256       ierr = PetscFree(tseq);CHKERRQ(ierr);
257       aa =  (Mat_SeqAIJ*)A_seq->data;
258     } else {
259       aa =  (Mat_SeqAIJ*)A->data;
260     }
261 
262     /* Convert Petsc NR matrix to SuperLU_DIST NC.
263        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
264     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
265       if (lu->FactPattern == SamePattern_SameRowPerm){
266         Destroy_CompCol_Matrix_dist(&lu->A_sup);
267         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
268         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
269       } else {
270         Destroy_CompCol_Matrix_dist(&lu->A_sup);
271         Destroy_LU(N, &lu->grid, &lu->LUstruct);
272         lu->options.Fact = SamePattern;
273       }
274     }
275 #if defined(PETSC_USE_COMPLEX)
276     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
277 #else
278     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
279 #endif
280 
281     /* Create compressed column matrix A_sup. */
282 #if defined(PETSC_USE_COMPLEX)
283     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
284 #else
285     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
286 #endif
287   } else { /* distributed mat input */
288     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
289     aa=(Mat_SeqAIJ*)(mat->A)->data;
290     bb=(Mat_SeqAIJ*)(mat->B)->data;
291     ai=aa->i; aj=aa->j;
292     bi=bb->i; bj=bb->j;
293 #if defined(PETSC_USE_COMPLEX)
294     av=(doublecomplex*)aa->a;
295     bv=(doublecomplex*)bb->a;
296 #else
297     av=aa->a;
298     bv=bb->a;
299 #endif
300     rstart = A->rmap.rstart;
301     nz     = aa->nz + bb->nz;
302     garray = mat->garray;
303 
304     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
305 #if defined(PETSC_USE_COMPLEX)
306       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
307 #else
308       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
309 #endif
310     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
311       if (lu->FactPattern == SamePattern_SameRowPerm){
312         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
313         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
314       } else {
315         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
316         lu->options.Fact = SamePattern;
317       }
318     }
319     nz = 0; irow = rstart;
320     for ( i=0; i<m; i++ ) {
321       lu->row[i] = nz;
322       countA = ai[i+1] - ai[i];
323       countB = bi[i+1] - bi[i];
324       ajj = aj + ai[i];  /* ptr to the beginning of this row */
325       bjj = bj + bi[i];
326 
327       /* B part, smaller col index */
328       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
329       jB = 0;
330       for (j=0; j<countB; j++){
331         jcol = garray[bjj[j]];
332         if (jcol > colA_start) {
333           jB = j;
334           break;
335         }
336         lu->col[nz] = jcol;
337         lu->val[nz++] = *bv++;
338         if (j==countB-1) jB = countB;
339       }
340 
341       /* A part */
342       for (j=0; j<countA; j++){
343         lu->col[nz] = rstart + ajj[j];
344         lu->val[nz++] = *av++;
345       }
346 
347       /* B part, larger col index */
348       for (j=jB; j<countB; j++){
349         lu->col[nz] = garray[bjj[j]];
350         lu->val[nz++] = *bv++;
351       }
352     }
353     lu->row[m] = nz;
354 #if defined(PETSC_USE_COMPLEX)
355     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
356 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
357 #else
358     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
359 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
360 #endif
361   }
362   if (lu->options.PrintStat) {
363     ierr = PetscGetTime(&time);CHKERRQ(ierr);
364     time0 = time - time0;
365   }
366 
367   /* Factor the matrix. */
368   PStatInit(&stat);   /* Initialize the statistics variables. */
369 
370   if (lu->MatInputMode == GLOBAL) { /* global mat input */
371 #if defined(PETSC_USE_COMPLEX)
372     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
373                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
374 #else
375     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
376                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
377 #endif
378   } else { /* distributed mat input */
379 #if defined(PETSC_USE_COMPLEX)
380     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
381 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
382     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
383 #else
384     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
385 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
386     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
387 #endif
388   }
389 
390   if (lu->MatInputMode == GLOBAL && size > 1){
391     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
392   }
393 
394   if (lu->options.PrintStat) {
395     if (size > 1){
396       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
397       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
398       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
399       time = time/size; /* average time */
400       if (!rank)
401         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
402                               %g / %g / %g\n",time_max,time_min,time);
403     } else {
404       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
405                               %g\n",time0);
406     }
407 
408     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
409   }
410   PStatFree(&stat);
411   if (size > 1){
412     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
413     F_diag->assembled = PETSC_TRUE;
414   }
415   (*F)->assembled  = PETSC_TRUE;
416   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
417   PetscFunctionReturn(0);
418 }
419 
420 /* Note the Petsc r and c permutations are ignored */
421 #undef __FUNCT__
422 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
423 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
424 {
425   Mat               B;
426   Mat_SuperLU_DIST  *lu;
427   PetscErrorCode    ierr;
428   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
429   PetscMPIInt       size;
430   superlu_options_t options;
431   PetscTruth        flg;
432   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
433   const char        *prtype[] = {"LargeDiag","NATURAL"};
434   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
435 
436   PetscFunctionBegin;
437   /* Create the factorization matrix */
438   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
439   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
440   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
441   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
442   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
443 
444   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
445   B->ops->solve            = MatSolve_SuperLU_DIST;
446   B->factor                = FACTOR_LU;
447 
448   lu = (Mat_SuperLU_DIST*)(B->spptr);
449 
450   /*   Set the default input options:
451         options.Fact = DOFACT;
452         options.Equil = YES;
453         options.ColPerm = MMD_AT_PLUS_A;
454         options.RowPerm = LargeDiag;
455         options.ReplaceTinyPivot = YES;
456         options.Trans = NOTRANS;
457         options.IterRefine = DOUBLE;
458         options.SolveInitialized = NO;
459         options.RefineInitialized = NO;
460         options.PrintStat = YES;
461   */
462   set_default_options_dist(&options);
463 
464   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
465   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
466   /* Default num of process columns and rows */
467   lu->npcol = (PetscMPIInt)(0.5 + sqrt((PetscReal)size));
468   if (!lu->npcol) lu->npcol = 1;
469   while (lu->npcol > 0) {
470     lu->nprow = (PetscMPIInt)(size/lu->npcol);
471     if (size == lu->nprow * lu->npcol) break;
472     lu->npcol --;
473   }
474 
475   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
476     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
477     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
478     if (size != lu->nprow * lu->npcol)
479       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
480 
481     lu->MatInputMode = DISTRIBUTED;
482     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
483     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
484 
485     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
486     if (!flg) {
487       options.Equil = NO;
488     }
489 
490     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
491     if (flg) {
492       switch (indx) {
493       case 0:
494         options.RowPerm = LargeDiag;
495         break;
496       case 1:
497         options.RowPerm = NOROWPERM;
498         break;
499       }
500     }
501 
502     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);CHKERRQ(ierr);
503     if (flg) {
504       switch (indx) {
505       case 0:
506         options.ColPerm = MMD_AT_PLUS_A;
507         break;
508       case 1:
509         options.ColPerm = NATURAL;
510         break;
511       case 2:
512         options.ColPerm = MMD_ATA;
513         break;
514       }
515     }
516 
517     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
518     if (!flg) {
519       options.ReplaceTinyPivot = NO;
520     }
521 
522     lu->FactPattern = SamePattern;
523     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
524     if (flg) {
525       switch (indx) {
526       case 0:
527         lu->FactPattern = SamePattern;
528         break;
529       case 1:
530         lu->FactPattern = SamePattern_SameRowPerm;
531         break;
532       }
533     }
534 
535     options.IterRefine = NOREFINE;
536     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
537     if (flg) {
538       options.IterRefine = DOUBLE;
539     }
540 
541     if (PetscLogPrintInfo) {
542       options.PrintStat = YES;
543     } else {
544       options.PrintStat = NO;
545     }
546     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
547                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
548   PetscOptionsEnd();
549 
550   /* Initialize the SuperLU process grid. */
551   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
552 
553   /* Initialize ScalePermstruct and LUstruct. */
554   ScalePermstructInit(M, N, &lu->ScalePermstruct);
555   LUstructInit(M, N, &lu->LUstruct);
556 
557   lu->options             = options;
558   lu->options.Fact        = DOFACT;
559   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
560   *F = B;
561   PetscFunctionReturn(0);
562 }
563 
564 #undef __FUNCT__
565 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
566 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
567   PetscErrorCode   ierr;
568   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
569 
570   PetscFunctionBegin;
571   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
572   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
573   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
574   PetscFunctionReturn(0);
575 }
576 
577 #undef __FUNCT__
578 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
579 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
580 {
581   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
582   superlu_options_t options;
583   PetscErrorCode    ierr;
584 
585   PetscFunctionBegin;
586   /* check if matrix is superlu_dist type */
587   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
588 
589   options = lu->options;
590   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
591   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
592   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
593   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
594   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
595   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
596   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
597   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
598   if (options.ColPerm == NATURAL) {
599     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
600   } else if (options.ColPerm == MMD_AT_PLUS_A) {
601     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
602   } else if (options.ColPerm == MMD_ATA) {
603     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
604   } else {
605     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
606   }
607 
608   if (lu->FactPattern == SamePattern){
609     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
610   } else {
611     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
612   }
613   PetscFunctionReturn(0);
614 }
615 
616 #undef __FUNCT__
617 #define __FUNCT__ "MatView_SuperLU_DIST"
618 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
619 {
620   PetscErrorCode    ierr;
621   PetscTruth        iascii;
622   PetscViewerFormat format;
623   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
624 
625   PetscFunctionBegin;
626   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
627 
628   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
629   if (iascii) {
630     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
631     if (format == PETSC_VIEWER_ASCII_INFO) {
632       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
633     }
634   }
635   PetscFunctionReturn(0);
636 }
637 
638 
639 EXTERN_C_BEGIN
640 #undef __FUNCT__
641 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST"
642 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
643 {
644   /* This routine is only called to convert to MATSUPERLU_DIST */
645   /* from MATSEQAIJ if A has a single process communicator */
646   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
647   PetscErrorCode   ierr;
648   PetscMPIInt      size;
649   MPI_Comm         comm;
650   Mat              B=*newmat;
651   Mat_SuperLU_DIST *lu;
652 
653   PetscFunctionBegin;
654   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
655   if (reuse == MAT_INITIAL_MATRIX) {
656     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
657     lu->MatDuplicate         = B->ops->duplicate;
658     lu->MatView              = B->ops->view;
659     lu->MatAssemblyEnd       = B->ops->assemblyend;
660     lu->MatLUFactorSymbolic  = B->ops->lufactorsymbolic;
661     lu->MatDestroy           = B->ops->destroy;
662   } else {
663     lu->MatDuplicate         = A->ops->duplicate;
664     lu->MatView              = A->ops->view;
665     lu->MatAssemblyEnd       = A->ops->assemblyend;
666     lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
667     lu->MatDestroy           = A->ops->destroy;
668   }
669   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
670 
671   B->spptr                 = (void*)lu;
672   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
673   B->ops->view             = MatView_SuperLU_DIST;
674   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
675   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
676   B->ops->destroy          = MatDestroy_SuperLU_DIST;
677 
678   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
679   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
680   if (size == 1) {
681     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
682     "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
683     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
684     "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
685   } else {
686     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
687                                              "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
688     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
689                                              "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
690   }
691   ierr = PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr);
692   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
693   *newmat = B;
694   PetscFunctionReturn(0);
695 }
696 EXTERN_C_END
697 
698 #undef __FUNCT__
699 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
700 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
701   PetscErrorCode   ierr;
702   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
703 
704   PetscFunctionBegin;
705   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
706   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
707   PetscFunctionReturn(0);
708 }
709 
710 /*MC
711   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
712   via the external package SuperLU_DIST.
713 
714   If SuperLU_DIST is installed (see the manual for
715   instructions on how to declare the existence of external packages),
716   a matrix type can be constructed which invokes SuperLU_DIST solvers.
717   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
718 
719   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
720   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
721   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
722   for communicators controlling multiple processes.  It is recommended that you call both of
723   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
724   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
725   without data copy.
726 
727   Options Database Keys:
728 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
729 . -mat_superlu_dist_r <n> - number of rows in processor partition
730 . -mat_superlu_dist_c <n> - number of columns in processor partition
731 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
732 . -mat_superlu_dist_equil - equilibrate the matrix
733 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
734 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
735 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
736 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
737 . -mat_superlu_dist_iterrefine - use iterative refinement
738 - -mat_superlu_dist_statprint - print factorization information
739 
740    Level: beginner
741 
742 .seealso: PCLU
743 M*/
744 
745 EXTERN_C_BEGIN
746 #undef __FUNCT__
747 #define __FUNCT__ "MatCreate_SuperLU_DIST"
748 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
749 {
750   PetscErrorCode ierr;
751   PetscMPIInt    size;
752 
753   PetscFunctionBegin;
754   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
755   if (size == 1) {
756     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
757   } else {
758     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
759     /*  A_diag = 0x0 ???  -- do we need it?
760     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
761     ierr = MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
762     */
763   }
764   ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
765   PetscFunctionReturn(0);
766 }
767 EXTERN_C_END
768 
769