xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 51ec2c115fd3adf68a3f90e2f617835be2dadbbf)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum {GLOBAL,DISTRIBUTED} SuperLU_MatInputMode;
22 
23 typedef struct {
24   int_t                   nprow,npcol,*row,*col;
25   gridinfo_t              grid;
26   superlu_options_t       options;
27   SuperMatrix             A_sup;
28   ScalePermstruct_t       ScalePermstruct;
29   LUstruct_t              LUstruct;
30   int                     StatPrint;
31   int                     MatInputMode;
32   SOLVEstruct_t           SOLVEstruct;
33   fact_t                  FactPattern;
34   MPI_Comm                comm_superlu;
35 #if defined(PETSC_USE_COMPLEX)
36   doublecomplex           *val;
37 #else
38   double                  *val;
39 #endif
40 
41   /* A few function pointers for inheritance */
42   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
43   PetscErrorCode (*MatView)(Mat,PetscViewer);
44   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
45   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
46   PetscErrorCode (*MatDestroy)(Mat);
47 
48   /* Flag to clean up (non-global) SuperLU objects during Destroy */
49   PetscTruth CleanUpSuperLU_Dist;
50 } Mat_SuperLU_DIST;
51 
52 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
53 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
54 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *);
55 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
56 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
57 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType);
58 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
59 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *);
60 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *);
61 
62 EXTERN_C_BEGIN
63 #undef __FUNCT__
64 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ"
65 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
66 {
67   PetscErrorCode   ierr;
68   Mat              B=*newmat;
69   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
70 
71   PetscFunctionBegin;
72   if (reuse == MAT_INITIAL_MATRIX) {
73     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
74   }
75   /* Reset the original function pointers */
76   B->ops->duplicate        = lu->MatDuplicate;
77   B->ops->view             = lu->MatView;
78   B->ops->assemblyend      = lu->MatAssemblyEnd;
79   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
80   B->ops->destroy          = lu->MatDestroy;
81   ierr     = PetscFree(lu);CHKERRQ(ierr);
82   A->spptr = PETSC_NULL;
83 
84   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
85   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
86   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
87   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
88 
89   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
90   *newmat = B;
91   PetscFunctionReturn(0);
92 }
93 EXTERN_C_END
94 
95 #undef __FUNCT__
96 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
97 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
98 {
99   PetscErrorCode   ierr;
100   PetscMPIInt      size;
101   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
102 
103   PetscFunctionBegin;
104   if (lu->CleanUpSuperLU_Dist) {
105     /* Deallocate SuperLU_DIST storage */
106     if (lu->MatInputMode == GLOBAL) {
107       Destroy_CompCol_Matrix_dist(&lu->A_sup);
108     } else {
109       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
110       if ( lu->options.SolveInitialized ) {
111 #if defined(PETSC_USE_COMPLEX)
112         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
113 #else
114         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
115 #endif
116       }
117     }
118     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
119     ScalePermstructFree(&lu->ScalePermstruct);
120     LUstructFree(&lu->LUstruct);
121 
122     /* Release the SuperLU_DIST process grid. */
123     superlu_gridexit(&lu->grid);
124 
125     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
126   }
127 
128   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
129   if (size == 1) {
130     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
131   } else {
132     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
133   }
134   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
135   PetscFunctionReturn(0);
136 }
137 
138 #undef __FUNCT__
139 #define __FUNCT__ "MatSolve_SuperLU_DIST"
140 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
141 {
142   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
143   PetscErrorCode   ierr;
144   PetscMPIInt      size;
145   PetscInt         m=A->rmap.N, N=A->cmap.N;
146   SuperLUStat_t    stat;
147   double           berr[1];
148   PetscScalar      *bptr;
149   PetscInt         info, nrhs=1;
150   Vec              x_seq;
151   IS               iden;
152   VecScatter       scat;
153 
154   PetscFunctionBegin;
155   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
156   if (size > 1) {
157     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
158       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
159       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
160       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
161       ierr = ISDestroy(iden);CHKERRQ(ierr);
162 
163       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
164       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
165       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
166     } else { /* distributed mat input */
167       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
168       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
169     }
170   } else { /* size == 1 */
171     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
172     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
173   }
174 
175   if (lu->options.Fact != FACTORED) SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
176 
177   PStatInit(&stat);        /* Initialize the statistics variables. */
178   if (lu->MatInputMode == GLOBAL) {
179 #if defined(PETSC_USE_COMPLEX)
180     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
181                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
182 #else
183     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
184                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
185 #endif
186   } else { /* distributed mat input */
187 #if defined(PETSC_USE_COMPLEX)
188     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
189 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
190     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
191 #else
192     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
193 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
194     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
195 #endif
196   }
197   if (lu->options.PrintStat) {
198      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
199   }
200   PStatFree(&stat);
201 
202   if (size > 1) {
203     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
204       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
205       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
206       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
207       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
208       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
209     } else {
210       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
211     }
212   } else {
213     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
214   }
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
220 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
221 {
222   Mat              *tseq,A_seq = PETSC_NULL;
223   Mat_SeqAIJ       *aa,*bb;
224   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
225   PetscErrorCode   ierr;
226   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
227                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
228   PetscMPIInt      size,rank;
229   SuperLUStat_t    stat;
230   double           *berr=0;
231   IS               isrow;
232   PetscLogDouble   time0,time,time_min,time_max;
233   Mat              F_diag=PETSC_NULL;
234 #if defined(PETSC_USE_COMPLEX)
235   doublecomplex    *av, *bv;
236 #else
237   double           *av, *bv;
238 #endif
239 
240   PetscFunctionBegin;
241   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
242   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
243 
244   if (lu->options.PrintStat) { /* collect time for mat conversion */
245     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
246     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
247   }
248 
249   if (lu->MatInputMode == GLOBAL) { /* global mat input */
250     if (size > 1) { /* convert mpi A to seq mat A */
251       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
252       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
253       ierr = ISDestroy(isrow);CHKERRQ(ierr);
254 
255       A_seq = *tseq;
256       ierr = PetscFree(tseq);CHKERRQ(ierr);
257       aa =  (Mat_SeqAIJ*)A_seq->data;
258     } else {
259       aa =  (Mat_SeqAIJ*)A->data;
260     }
261 
262     /* Convert Petsc NR matrix to SuperLU_DIST NC.
263        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
264     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
265       if (lu->FactPattern == SamePattern_SameRowPerm){
266         Destroy_CompCol_Matrix_dist(&lu->A_sup);
267         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
268         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
269       } else {
270         Destroy_CompCol_Matrix_dist(&lu->A_sup);
271         Destroy_LU(N, &lu->grid, &lu->LUstruct);
272         lu->options.Fact = SamePattern;
273       }
274     }
275 #if defined(PETSC_USE_COMPLEX)
276     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
277 #else
278     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
279 #endif
280 
281     /* Create compressed column matrix A_sup. */
282 #if defined(PETSC_USE_COMPLEX)
283     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
284 #else
285     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
286 #endif
287   } else { /* distributed mat input */
288     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
289     aa=(Mat_SeqAIJ*)(mat->A)->data;
290     bb=(Mat_SeqAIJ*)(mat->B)->data;
291     ai=aa->i; aj=aa->j;
292     bi=bb->i; bj=bb->j;
293 #if defined(PETSC_USE_COMPLEX)
294     av=(doublecomplex*)aa->a;
295     bv=(doublecomplex*)bb->a;
296 #else
297     av=aa->a;
298     bv=bb->a;
299 #endif
300     rstart = A->rmap.rstart;
301     nz     = aa->nz + bb->nz;
302     garray = mat->garray;
303 
304     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
305 #if defined(PETSC_USE_COMPLEX)
306       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
307 #else
308       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
309 #endif
310     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
311       if (lu->FactPattern == SamePattern_SameRowPerm){
312         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
313         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
314       } else {
315         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
316         lu->options.Fact = SamePattern;
317       }
318     }
319     nz = 0; irow = rstart;
320     for ( i=0; i<m; i++ ) {
321       lu->row[i] = nz;
322       countA = ai[i+1] - ai[i];
323       countB = bi[i+1] - bi[i];
324       ajj = aj + ai[i];  /* ptr to the beginning of this row */
325       bjj = bj + bi[i];
326 
327       /* B part, smaller col index */
328       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
329       jB = 0;
330       for (j=0; j<countB; j++){
331         jcol = garray[bjj[j]];
332         if (jcol > colA_start) {
333           jB = j;
334           break;
335         }
336         lu->col[nz] = jcol;
337         lu->val[nz++] = *bv++;
338         if (j==countB-1) jB = countB;
339       }
340 
341       /* A part */
342       for (j=0; j<countA; j++){
343         lu->col[nz] = rstart + ajj[j];
344         lu->val[nz++] = *av++;
345       }
346 
347       /* B part, larger col index */
348       for (j=jB; j<countB; j++){
349         lu->col[nz] = garray[bjj[j]];
350         lu->val[nz++] = *bv++;
351       }
352     }
353     lu->row[m] = nz;
354 #if defined(PETSC_USE_COMPLEX)
355     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
356 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
357 #else
358     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
359 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
360 #endif
361   }
362   if (lu->options.PrintStat) {
363     ierr = PetscGetTime(&time);CHKERRQ(ierr);
364     time0 = time - time0;
365   }
366 
367   /* Factor the matrix. */
368   PStatInit(&stat);   /* Initialize the statistics variables. */
369 
370   if (lu->MatInputMode == GLOBAL) { /* global mat input */
371 #if defined(PETSC_USE_COMPLEX)
372     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
373                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
374 #else
375     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
376                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
377 #endif
378   } else { /* distributed mat input */
379 #if defined(PETSC_USE_COMPLEX)
380     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
381 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
382     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
383 #else
384     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
385 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
386     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
387 #endif
388   }
389 
390   if (lu->MatInputMode == GLOBAL && size > 1){
391     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
392   }
393 
394   if (lu->options.PrintStat) {
395     if (size > 1){
396       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
397       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
398       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
399       time = time/size; /* average time */
400       if (!rank) {
401         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n                              %g / %g / %g\n",time_max,time_min,time);CHKERRQ(ierr);
402       }
403     } else {
404       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n    %g\n",time0);CHKERRQ(ierr);
405     }
406     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
407   }
408   PStatFree(&stat);
409   if (size > 1){
410     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
411     F_diag->assembled = PETSC_TRUE;
412   }
413   (*F)->assembled  = PETSC_TRUE;
414   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
415   PetscFunctionReturn(0);
416 }
417 
418 /* Note the Petsc r and c permutations are ignored */
419 #undef __FUNCT__
420 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
421 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
422 {
423   Mat               B;
424   Mat_SuperLU_DIST  *lu;
425   PetscErrorCode    ierr;
426   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
427   PetscMPIInt       size;
428   superlu_options_t options;
429   PetscTruth        flg;
430   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
431   const char        *prtype[] = {"LargeDiag","NATURAL"};
432   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
433 
434   PetscFunctionBegin;
435   /* Create the factorization matrix */
436   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
437   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
438   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
439   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
440   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
441 
442   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
443   B->ops->solve            = MatSolve_SuperLU_DIST;
444   B->factor                = FACTOR_LU;
445 
446   lu = (Mat_SuperLU_DIST*)(B->spptr);
447 
448   /*   Set the default input options:
449         options.Fact = DOFACT;
450         options.Equil = YES;
451         options.ColPerm = MMD_AT_PLUS_A;
452         options.RowPerm = LargeDiag;
453         options.ReplaceTinyPivot = YES;
454         options.Trans = NOTRANS;
455         options.IterRefine = DOUBLE;
456         options.SolveInitialized = NO;
457         options.RefineInitialized = NO;
458         options.PrintStat = YES;
459   */
460   set_default_options_dist(&options);
461 
462   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
463   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
464   /* Default num of process columns and rows */
465   lu->npcol = (PetscMPIInt)(0.5 + sqrt((PetscReal)size));
466   if (!lu->npcol) lu->npcol = 1;
467   while (lu->npcol > 0) {
468     lu->nprow = (PetscMPIInt)(size/lu->npcol);
469     if (size == lu->nprow * lu->npcol) break;
470     lu->npcol --;
471   }
472 
473   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
474     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
475     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
476     if (size != lu->nprow * lu->npcol)
477       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
478 
479     lu->MatInputMode = DISTRIBUTED;
480     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
481     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
482 
483     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
484     if (!flg) {
485       options.Equil = NO;
486     }
487 
488     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
489     if (flg) {
490       switch (indx) {
491       case 0:
492         options.RowPerm = LargeDiag;
493         break;
494       case 1:
495         options.RowPerm = NOROWPERM;
496         break;
497       }
498     }
499 
500     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);CHKERRQ(ierr);
501     if (flg) {
502       switch (indx) {
503       case 0:
504         options.ColPerm = MMD_AT_PLUS_A;
505         break;
506       case 1:
507         options.ColPerm = NATURAL;
508         break;
509       case 2:
510         options.ColPerm = MMD_ATA;
511         break;
512       }
513     }
514 
515     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
516     if (!flg) {
517       options.ReplaceTinyPivot = NO;
518     }
519 
520     lu->FactPattern = SamePattern;
521     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
522     if (flg) {
523       switch (indx) {
524       case 0:
525         lu->FactPattern = SamePattern;
526         break;
527       case 1:
528         lu->FactPattern = SamePattern_SameRowPerm;
529         break;
530       }
531     }
532 
533     options.IterRefine = NOREFINE;
534     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
535     if (flg) {
536       options.IterRefine = DOUBLE;
537     }
538 
539     if (PetscLogPrintInfo) {
540       options.PrintStat = YES;
541     } else {
542       options.PrintStat = NO;
543     }
544     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
545                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
546   PetscOptionsEnd();
547 
548   /* Initialize the SuperLU process grid. */
549   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
550 
551   /* Initialize ScalePermstruct and LUstruct. */
552   ScalePermstructInit(M, N, &lu->ScalePermstruct);
553   LUstructInit(M, N, &lu->LUstruct);
554 
555   lu->options             = options;
556   lu->options.Fact        = DOFACT;
557   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
558   *F = B;
559   PetscFunctionReturn(0);
560 }
561 
562 #undef __FUNCT__
563 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
564 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
565   PetscErrorCode   ierr;
566   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
567 
568   PetscFunctionBegin;
569   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
570   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
571   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
572   PetscFunctionReturn(0);
573 }
574 
575 #undef __FUNCT__
576 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
577 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
578 {
579   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
580   superlu_options_t options;
581   PetscErrorCode    ierr;
582 
583   PetscFunctionBegin;
584   /* check if matrix is superlu_dist type */
585   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
586 
587   options = lu->options;
588   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
589   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
590   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
591   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
592   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
593   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
594   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
595   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
596   if (options.ColPerm == NATURAL) {
597     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
598   } else if (options.ColPerm == MMD_AT_PLUS_A) {
599     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
600   } else if (options.ColPerm == MMD_ATA) {
601     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
602   } else {
603     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
604   }
605 
606   if (lu->FactPattern == SamePattern){
607     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
608   } else {
609     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
610   }
611   PetscFunctionReturn(0);
612 }
613 
614 #undef __FUNCT__
615 #define __FUNCT__ "MatView_SuperLU_DIST"
616 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
617 {
618   PetscErrorCode    ierr;
619   PetscTruth        iascii;
620   PetscViewerFormat format;
621   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
622 
623   PetscFunctionBegin;
624   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
625 
626   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
627   if (iascii) {
628     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
629     if (format == PETSC_VIEWER_ASCII_INFO) {
630       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
631     }
632   }
633   PetscFunctionReturn(0);
634 }
635 
636 
637 EXTERN_C_BEGIN
638 #undef __FUNCT__
639 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST"
640 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
641 {
642   /* This routine is only called to convert to MATSUPERLU_DIST */
643   /* from MATSEQAIJ if A has a single process communicator */
644   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
645   PetscErrorCode   ierr;
646   PetscMPIInt      size;
647   MPI_Comm         comm;
648   Mat              B=*newmat;
649   Mat_SuperLU_DIST *lu;
650 
651   PetscFunctionBegin;
652   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
653   if (reuse == MAT_INITIAL_MATRIX) {
654     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
655     lu->MatDuplicate         = B->ops->duplicate;
656     lu->MatView              = B->ops->view;
657     lu->MatAssemblyEnd       = B->ops->assemblyend;
658     lu->MatLUFactorSymbolic  = B->ops->lufactorsymbolic;
659     lu->MatDestroy           = B->ops->destroy;
660   } else {
661     lu->MatDuplicate         = A->ops->duplicate;
662     lu->MatView              = A->ops->view;
663     lu->MatAssemblyEnd       = A->ops->assemblyend;
664     lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
665     lu->MatDestroy           = A->ops->destroy;
666   }
667   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
668 
669   B->spptr                 = (void*)lu;
670   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
671   B->ops->view             = MatView_SuperLU_DIST;
672   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
673   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
674   B->ops->destroy          = MatDestroy_SuperLU_DIST;
675 
676   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
677   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
678   if (size == 1) {
679     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
680     "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
681     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
682     "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
683   } else {
684     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
685                                              "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
686     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
687                                              "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
688   }
689   ierr = PetscInfo(A,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr);
690   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
691   *newmat = B;
692   PetscFunctionReturn(0);
693 }
694 EXTERN_C_END
695 
696 #undef __FUNCT__
697 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
698 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
699   PetscErrorCode   ierr;
700   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
701 
702   PetscFunctionBegin;
703   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
704   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
705   PetscFunctionReturn(0);
706 }
707 
708 /*MC
709   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
710   via the external package SuperLU_DIST.
711 
712   If SuperLU_DIST is installed (see the manual for
713   instructions on how to declare the existence of external packages),
714   a matrix type can be constructed which invokes SuperLU_DIST solvers.
715   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
716   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
717   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
718 
719   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
720   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
721   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
722   for communicators controlling multiple processes.  It is recommended that you call both of
723   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
724   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
725   without data copy; but this MUST be called AFTER the matrix values are set.
726 
727 
728 
729   Options Database Keys:
730 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
731 . -mat_superlu_dist_r <n> - number of rows in processor partition
732 . -mat_superlu_dist_c <n> - number of columns in processor partition
733 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
734 . -mat_superlu_dist_equil - equilibrate the matrix
735 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
736 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
737 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
738 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
739 . -mat_superlu_dist_iterrefine - use iterative refinement
740 - -mat_superlu_dist_statprint - print factorization information
741 
742    Level: beginner
743 
744 .seealso: PCLU
745 M*/
746 
747 EXTERN_C_BEGIN
748 #undef __FUNCT__
749 #define __FUNCT__ "MatCreate_SuperLU_DIST"
750 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
751 {
752   PetscErrorCode ierr;
753   PetscMPIInt    size;
754 
755   PetscFunctionBegin;
756   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
757   if (size == 1) {
758     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
759   } else {
760     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
761     /*  A_diag = 0x0 ???  -- do we need it?
762     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
763     ierr = MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
764     */
765   }
766   ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
767   PetscFunctionReturn(0);
768 }
769 EXTERN_C_END
770 
771