xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision bd920abb044c96993aaee00a1f0a41db581d8359)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get around weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum { GLOBAL,DISTRIBUTED
22 } SuperLU_MatInputMode;
23 
24 typedef struct {
25   int_t                   nprow,npcol,*row,*col;
26   gridinfo_t              grid;
27   superlu_options_t       options;
28   SuperMatrix             A_sup;
29   ScalePermstruct_t       ScalePermstruct;
30   LUstruct_t              LUstruct;
31   int                     StatPrint;
32   int                     MatInputMode;
33   SOLVEstruct_t           SOLVEstruct;
34   fact_t                  FactPattern;
35   MPI_Comm                comm_superlu;
36 #if defined(PETSC_USE_COMPLEX)
37   doublecomplex           *val;
38 #else
39   double                  *val;
40 #endif
41 
42   /* A few function pointers for inheritance */
43   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   PetscErrorCode (*MatView)(Mat,PetscViewer);
45   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 extern PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat,PetscViewer);
55 extern PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat,MatFactorInfo *,Mat *);
56 extern PetscErrorCode MatDestroy_SuperLU_DIST(Mat);
57 extern PetscErrorCode MatView_SuperLU_DIST(Mat,PetscViewer);
58 extern PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat,MatAssemblyType);
59 extern PetscErrorCode MatSolve_SuperLU_DIST(Mat,Vec,Vec);
60 extern PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat,IS,IS,MatFactorInfo *,Mat *);
61 extern PetscErrorCode MatDuplicate_SuperLU_DIST(Mat, MatDuplicateOption, Mat *);
62 
63 EXTERN_C_BEGIN
64 #undef __FUNCT__
65 #define __FUNCT__ "MatConvert_SuperLU_DIST_AIJ"
66 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_AIJ(Mat A,MatType type,MatReuse reuse,Mat *newmat)
67 {
68   PetscErrorCode   ierr;
69   Mat              B=*newmat;
70   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
71 
72   PetscFunctionBegin;
73   if (reuse == MAT_INITIAL_MATRIX) {
74     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
75   }
76   /* Reset the original function pointers */
77   B->ops->duplicate        = lu->MatDuplicate;
78   B->ops->view             = lu->MatView;
79   B->ops->assemblyend      = lu->MatAssemblyEnd;
80   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
81   B->ops->destroy          = lu->MatDestroy;
82   ierr     = PetscFree(lu);CHKERRQ(ierr);
83   A->spptr = PETSC_NULL;
84 
85   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
86   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
87   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
88   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
89 
90   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
91   *newmat = B;
92   PetscFunctionReturn(0);
93 }
94 EXTERN_C_END
95 
96 #undef __FUNCT__
97 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
98 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
99 {
100   PetscErrorCode   ierr;
101   PetscMPIInt      size;
102   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
103 
104   PetscFunctionBegin;
105   if (lu->CleanUpSuperLU_Dist) {
106     /* Deallocate SuperLU_DIST storage */
107     if (lu->MatInputMode == GLOBAL) {
108       Destroy_CompCol_Matrix_dist(&lu->A_sup);
109     } else {
110       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
111       if ( lu->options.SolveInitialized ) {
112 #if defined(PETSC_USE_COMPLEX)
113         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
114 #else
115         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
116 #endif
117       }
118     }
119     Destroy_LU(A->cmap.N, &lu->grid, &lu->LUstruct);
120     ScalePermstructFree(&lu->ScalePermstruct);
121     LUstructFree(&lu->LUstruct);
122 
123     /* Release the SuperLU_DIST process grid. */
124     superlu_gridexit(&lu->grid);
125 
126     ierr = MPI_Comm_free(&(((PetscObject)lu)->comm_superlu));CHKERRQ(ierr);
127   }
128 
129   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
130   if (size == 1) {
131     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
132   } else {
133     ierr = MatConvert_SuperLU_DIST_AIJ(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
134   }
135   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
136   PetscFunctionReturn(0);
137 }
138 
139 #undef __FUNCT__
140 #define __FUNCT__ "MatSolve_SuperLU_DIST"
141 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
142 {
143   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
144   PetscErrorCode   ierr;
145   PetscMPIInt      size;
146   PetscInt         m=A->rmap.N, N=A->cmap.N;
147   SuperLUStat_t    stat;
148   double           berr[1];
149   PetscScalar      *bptr;
150   PetscInt         info, nrhs=1;
151   Vec              x_seq;
152   IS               iden;
153   VecScatter       scat;
154 
155   PetscFunctionBegin;
156   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
157   if (size > 1) {
158     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
159       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
160       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
161       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
162       ierr = ISDestroy(iden);CHKERRQ(ierr);
163 
164       ierr = VecScatterBegin(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
165       ierr = VecScatterEnd(scat,b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
166       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
167     } else { /* distributed mat input */
168       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
169       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
170     }
171   } else { /* size == 1 */
172     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
173     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
174   }
175 
176   if (lu->options.Fact != FACTORED)
177     SETERRQ(PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact mush equal FACTORED");
178 
179   PStatInit(&stat);        /* Initialize the statistics variables. */
180   if (lu->MatInputMode == GLOBAL) {
181 #if defined(PETSC_USE_COMPLEX)
182     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
183                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
184 #else
185     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
186                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
187 #endif
188   } else { /* distributed mat input */
189 #if defined(PETSC_USE_COMPLEX)
190     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->rmap.N, nrhs, &lu->grid,
191 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
192     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
193 #else
194     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->rmap.N, nrhs, &lu->grid,
195 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
196     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
197 #endif
198   }
199   if (lu->options.PrintStat) {
200      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
201   }
202   PStatFree(&stat);
203 
204   if (size > 1) {
205     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
206       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
207       ierr = VecScatterBegin(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
208       ierr = VecScatterEnd(scat,x_seq,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
209       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
210       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
211     } else {
212       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
213     }
214   } else {
215     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
216   }
217   PetscFunctionReturn(0);
218 }
219 
220 #undef __FUNCT__
221 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
222 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
223 {
224   Mat              *tseq,A_seq = PETSC_NULL;
225   Mat_SeqAIJ       *aa,*bb;
226   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
227   PetscErrorCode   ierr;
228   PetscInt         M=A->rmap.N,N=A->cmap.N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
229                    m=A->rmap.n, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
230   PetscMPIInt      size,rank;
231   SuperLUStat_t    stat;
232   double           *berr=0;
233   IS               isrow;
234   PetscLogDouble   time0,time,time_min,time_max;
235   Mat              F_diag=PETSC_NULL;
236 #if defined(PETSC_USE_COMPLEX)
237   doublecomplex    *av, *bv;
238 #else
239   double           *av, *bv;
240 #endif
241 
242   PetscFunctionBegin;
243   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
244   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
245 
246   if (lu->options.PrintStat) { /* collect time for mat conversion */
247     ierr = MPI_Barrier(((PetscObject)A)->comm);CHKERRQ(ierr);
248     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
249   }
250 
251   if (lu->MatInputMode == GLOBAL) { /* global mat input */
252     if (size > 1) { /* convert mpi A to seq mat A */
253       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
254       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
255       ierr = ISDestroy(isrow);CHKERRQ(ierr);
256 
257       A_seq = *tseq;
258       ierr = PetscFree(tseq);CHKERRQ(ierr);
259       aa =  (Mat_SeqAIJ*)A_seq->data;
260     } else {
261       aa =  (Mat_SeqAIJ*)A->data;
262     }
263 
264     /* Convert Petsc NR matrix to SuperLU_DIST NC.
265        Note: memories of lu->val, col and row are allocated by CompRow_to_CompCol_dist()! */
266     if (lu->options.Fact != DOFACT) {/* successive numeric factorization, sparsity pattern is reused. */
267       if (lu->FactPattern == SamePattern_SameRowPerm){
268         Destroy_CompCol_Matrix_dist(&lu->A_sup);
269         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
270         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
271       } else {
272         Destroy_CompCol_Matrix_dist(&lu->A_sup);
273         Destroy_LU(N, &lu->grid, &lu->LUstruct);
274         lu->options.Fact = SamePattern;
275       }
276     }
277 #if defined(PETSC_USE_COMPLEX)
278     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
279 #else
280     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
281 #endif
282 
283     /* Create compressed column matrix A_sup. */
284 #if defined(PETSC_USE_COMPLEX)
285     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
286 #else
287     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
288 #endif
289   } else { /* distributed mat input */
290     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
291     aa=(Mat_SeqAIJ*)(mat->A)->data;
292     bb=(Mat_SeqAIJ*)(mat->B)->data;
293     ai=aa->i; aj=aa->j;
294     bi=bb->i; bj=bb->j;
295 #if defined(PETSC_USE_COMPLEX)
296     av=(doublecomplex*)aa->a;
297     bv=(doublecomplex*)bb->a;
298 #else
299     av=aa->a;
300     bv=bb->a;
301 #endif
302     rstart = A->rmap.rstart;
303     nz     = aa->nz + bb->nz;
304     garray = mat->garray;
305 
306     if (lu->options.Fact == DOFACT) {/* first numeric factorization */
307 #if defined(PETSC_USE_COMPLEX)
308       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
309 #else
310       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
311 #endif
312     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
313       if (lu->FactPattern == SamePattern_SameRowPerm){
314         /* Destroy_LU(N, &lu->grid, &lu->LUstruct); Crash! Comment it out does not lead to mem leak. */
315         lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */
316       } else {
317         Destroy_LU(N, &lu->grid, &lu->LUstruct); /* Deallocate storage associated with the L and U matrices. */
318         lu->options.Fact = SamePattern;
319       }
320     }
321     nz = 0; irow = rstart;
322     for ( i=0; i<m; i++ ) {
323       lu->row[i] = nz;
324       countA = ai[i+1] - ai[i];
325       countB = bi[i+1] - bi[i];
326       ajj = aj + ai[i];  /* ptr to the beginning of this row */
327       bjj = bj + bi[i];
328 
329       /* B part, smaller col index */
330       colA_start = rstart + ajj[0]; /* the smallest global col index of A */
331       jB = 0;
332       for (j=0; j<countB; j++){
333         jcol = garray[bjj[j]];
334         if (jcol > colA_start) {
335           jB = j;
336           break;
337         }
338         lu->col[nz] = jcol;
339         lu->val[nz++] = *bv++;
340         if (j==countB-1) jB = countB;
341       }
342 
343       /* A part */
344       for (j=0; j<countA; j++){
345         lu->col[nz] = rstart + ajj[j];
346         lu->val[nz++] = *av++;
347       }
348 
349       /* B part, larger col index */
350       for (j=jB; j<countB; j++){
351         lu->col[nz] = garray[bjj[j]];
352         lu->val[nz++] = *bv++;
353       }
354     }
355     lu->row[m] = nz;
356 #if defined(PETSC_USE_COMPLEX)
357     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
358 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
359 #else
360     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
361 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
362 #endif
363   }
364   if (lu->options.PrintStat) {
365     ierr = PetscGetTime(&time);CHKERRQ(ierr);
366     time0 = time - time0;
367   }
368 
369   /* Factor the matrix. */
370   PStatInit(&stat);   /* Initialize the statistics variables. */
371 
372   if (lu->MatInputMode == GLOBAL) { /* global mat input */
373 #if defined(PETSC_USE_COMPLEX)
374     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
375                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
376 #else
377     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
378                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
379 #endif
380   } else { /* distributed mat input */
381 #if defined(PETSC_USE_COMPLEX)
382     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
383 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
384     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
385 #else
386     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
387 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
388     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
389 #endif
390   }
391 
392   if (lu->MatInputMode == GLOBAL && size > 1){
393     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
394   }
395 
396   if (lu->options.PrintStat) {
397     if (size > 1){
398       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,((PetscObject)A)->comm);
399       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,((PetscObject)A)->comm);
400       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,((PetscObject)A)->comm);
401       time = time/size; /* average time */
402       if (!rank)
403         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
404                               %g / %g / %g\n",time_max,time_min,time);
405     } else {
406       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
407                               %g\n",time0);
408     }
409 
410     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
411   }
412   PStatFree(&stat);
413   if (size > 1){
414     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
415     F_diag->assembled = PETSC_TRUE;
416   }
417   (*F)->assembled  = PETSC_TRUE;
418   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */
419   PetscFunctionReturn(0);
420 }
421 
422 /* Note the Petsc r and c permutations are ignored */
423 #undef __FUNCT__
424 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
425 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
426 {
427   Mat               B;
428   Mat_SuperLU_DIST  *lu;
429   PetscErrorCode    ierr;
430   PetscInt          M=A->rmap.N,N=A->cmap.N,indx;
431   PetscMPIInt       size;
432   superlu_options_t options;
433   PetscTruth        flg;
434   const char        *pctype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
435   const char        *prtype[] = {"LargeDiag","NATURAL"};
436   const char        *factPattern[] = {"SamePattern","SamePattern_SameRowPerm"};
437 
438   PetscFunctionBegin;
439   /* Create the factorization matrix */
440   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
441   ierr = MatSetSizes(B,A->rmap.n,A->cmap.n,M,N);CHKERRQ(ierr);
442   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
443   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
444   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
445 
446   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
447   B->ops->solve            = MatSolve_SuperLU_DIST;
448   B->factor                = FACTOR_LU;
449 
450   lu = (Mat_SuperLU_DIST*)(B->spptr);
451 
452   /*   Set the default input options:
453         options.Fact = DOFACT;
454         options.Equil = YES;
455         options.ColPerm = MMD_AT_PLUS_A;
456         options.RowPerm = LargeDiag;
457         options.ReplaceTinyPivot = YES;
458         options.Trans = NOTRANS;
459         options.IterRefine = DOUBLE;
460         options.SolveInitialized = NO;
461         options.RefineInitialized = NO;
462         options.PrintStat = YES;
463   */
464   set_default_options_dist(&options);
465 
466   ierr = MPI_Comm_dup(((PetscObject)A)->comm,&(((PetscObject)lu)->comm_superlu));CHKERRQ(ierr);
467   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
468   /* Default num of process columns and rows */
469   lu->npcol = (PetscMPIInt)(0.5 + sqrt((PetscReal)size));
470   if (!lu->npcol) lu->npcol = 1;
471   while (lu->npcol > 0) {
472     lu->nprow = (PetscMPIInt)(size/lu->npcol);
473     if (size == lu->nprow * lu->npcol) break;
474     lu->npcol --;
475   }
476 
477   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
478     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
479     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
480     if (size != lu->nprow * lu->npcol)
481       SETERRQ3(PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %d * npcol %d",size,lu->nprow,lu->npcol);
482 
483     lu->MatInputMode = DISTRIBUTED;
484     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
485     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
486 
487     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
488     if (!flg) {
489       options.Equil = NO;
490     }
491 
492     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
493     if (flg) {
494       switch (indx) {
495       case 0:
496         options.RowPerm = LargeDiag;
497         break;
498       case 1:
499         options.RowPerm = NOROWPERM;
500         break;
501       }
502     }
503 
504     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",pctype,3,pctype[0],&indx,&flg);CHKERRQ(ierr);
505     if (flg) {
506       switch (indx) {
507       case 0:
508         options.ColPerm = MMD_AT_PLUS_A;
509         break;
510       case 1:
511         options.ColPerm = NATURAL;
512         break;
513       case 2:
514         options.ColPerm = MMD_ATA;
515         break;
516       }
517     }
518 
519     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
520     if (!flg) {
521       options.ReplaceTinyPivot = NO;
522     }
523 
524     lu->FactPattern = SamePattern;
525     ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,2,factPattern[0],&indx,&flg);CHKERRQ(ierr);
526     if (flg) {
527       switch (indx) {
528       case 0:
529         lu->FactPattern = SamePattern;
530         break;
531       case 1:
532         lu->FactPattern = SamePattern_SameRowPerm;
533         break;
534       }
535     }
536 
537     options.IterRefine = NOREFINE;
538     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
539     if (flg) {
540       options.IterRefine = DOUBLE;
541     }
542 
543     if (PetscLogPrintInfo) {
544       options.PrintStat = YES;
545     } else {
546       options.PrintStat = NO;
547     }
548     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
549                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
550   PetscOptionsEnd();
551 
552   /* Initialize the SuperLU process grid. */
553   superlu_gridinit(((PetscObject)lu)->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
554 
555   /* Initialize ScalePermstruct and LUstruct. */
556   ScalePermstructInit(M, N, &lu->ScalePermstruct);
557   LUstructInit(M, N, &lu->LUstruct);
558 
559   lu->options             = options;
560   lu->options.Fact        = DOFACT;
561   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
562   *F = B;
563   PetscFunctionReturn(0);
564 }
565 
566 #undef __FUNCT__
567 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
568 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
569   PetscErrorCode   ierr;
570   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
571 
572   PetscFunctionBegin;
573   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
574   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
575   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
581 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
582 {
583   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
584   superlu_options_t options;
585   PetscErrorCode    ierr;
586 
587   PetscFunctionBegin;
588   /* check if matrix is superlu_dist type */
589   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
590 
591   options = lu->options;
592   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
593   ierr = PetscViewerASCIIPrintf(viewer,"  Process grid nprow %D x npcol %D \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
594   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
595   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
596   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
597   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
598   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
599   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
600   if (options.ColPerm == NATURAL) {
601     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
602   } else if (options.ColPerm == MMD_AT_PLUS_A) {
603     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
604   } else if (options.ColPerm == MMD_ATA) {
605     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
606   } else {
607     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
608   }
609 
610   if (lu->FactPattern == SamePattern){
611     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern\n");CHKERRQ(ierr);
612   } else {
613     ierr = PetscViewerASCIIPrintf(viewer,"  Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr);
614   }
615   PetscFunctionReturn(0);
616 }
617 
618 #undef __FUNCT__
619 #define __FUNCT__ "MatView_SuperLU_DIST"
620 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
621 {
622   PetscErrorCode    ierr;
623   PetscTruth        iascii;
624   PetscViewerFormat format;
625   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
626 
627   PetscFunctionBegin;
628   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
629 
630   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
631   if (iascii) {
632     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
633     if (format == PETSC_VIEWER_ASCII_INFO) {
634       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
635     }
636   }
637   PetscFunctionReturn(0);
638 }
639 
640 
641 EXTERN_C_BEGIN
642 #undef __FUNCT__
643 #define __FUNCT__ "MatConvert_AIJ_SuperLU_DIST"
644 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_AIJ_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
645 {
646   /* This routine is only called to convert to MATSUPERLU_DIST */
647   /* from MATSEQAIJ if A has a single process communicator */
648   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
649   PetscErrorCode   ierr;
650   PetscMPIInt      size;
651   MPI_Comm         comm;
652   Mat              B=*newmat;
653   Mat_SuperLU_DIST *lu;
654 
655   PetscFunctionBegin;
656   ierr = PetscNewLog(B,Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
657   if (reuse == MAT_INITIAL_MATRIX) {
658     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
659     lu->MatDuplicate         = B->ops->duplicate;
660     lu->MatView              = B->ops->view;
661     lu->MatAssemblyEnd       = B->ops->assemblyend;
662     lu->MatLUFactorSymbolic  = B->ops->lufactorsymbolic;
663     lu->MatDestroy           = B->ops->destroy;
664   } else {
665     lu->MatDuplicate         = A->ops->duplicate;
666     lu->MatView              = A->ops->view;
667     lu->MatAssemblyEnd       = A->ops->assemblyend;
668     lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
669     lu->MatDestroy           = A->ops->destroy;
670   }
671   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
672 
673   B->spptr                 = (void*)lu;
674   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
675   B->ops->view             = MatView_SuperLU_DIST;
676   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
677   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
678   B->ops->destroy          = MatDestroy_SuperLU_DIST;
679 
680   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
681   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
682   if (size == 1) {
683     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
684     "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
685     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
686     "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
687   } else {
688     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
689                                              "MatConvert_AIJ_SuperLU_DIST",MatConvert_AIJ_SuperLU_DIST);CHKERRQ(ierr);
690     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
691                                              "MatConvert_SuperLU_DIST_AIJ",MatConvert_SuperLU_DIST_AIJ);CHKERRQ(ierr);
692   }
693   ierr = PetscInfo(0,"Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n");CHKERRQ(ierr);
694   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
695   *newmat = B;
696   PetscFunctionReturn(0);
697 }
698 EXTERN_C_END
699 
700 #undef __FUNCT__
701 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
702 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
703   PetscErrorCode   ierr;
704   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
705 
706   PetscFunctionBegin;
707   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
708   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
709   PetscFunctionReturn(0);
710 }
711 
712 /*MC
713   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
714   via the external package SuperLU_DIST.
715 
716   If SuperLU_DIST is installed (see the manual for
717   instructions on how to declare the existence of external packages),
718   a matrix type can be constructed which invokes SuperLU_DIST solvers.
719   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST) then
720   optionally call MatSeqAIJSetPreallocation() or MatMPIAIJSetPreallocation() DO NOT
721   call MatCreateSeqAIJ/MPIAIJ() directly or the preallocation information will be LOST!
722 
723   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
724   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
725   MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported
726   for communicators controlling multiple processes.  It is recommended that you call both of
727   the above preallocation routines for simplicity.  One can also call MatConvert() for an inplace
728   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
729   without data copy; but this MUST be called AFTER the matrix values are set.
730 
731 
732 
733   Options Database Keys:
734 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
735 . -mat_superlu_dist_r <n> - number of rows in processor partition
736 . -mat_superlu_dist_c <n> - number of columns in processor partition
737 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
738 . -mat_superlu_dist_equil - equilibrate the matrix
739 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
740 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
741 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
742 . -mat_superlu_dist_fact <SamePattern> (choose one of) SamePattern SamePattern_SameRowPerm
743 . -mat_superlu_dist_iterrefine - use iterative refinement
744 - -mat_superlu_dist_statprint - print factorization information
745 
746    Level: beginner
747 
748 .seealso: PCLU
749 M*/
750 
751 EXTERN_C_BEGIN
752 #undef __FUNCT__
753 #define __FUNCT__ "MatCreate_SuperLU_DIST"
754 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
755 {
756   PetscErrorCode ierr;
757   PetscMPIInt    size;
758 
759   PetscFunctionBegin;
760   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
761   if (size == 1) {
762     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
763   } else {
764     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
765     /*  A_diag = 0x0 ???  -- do we need it?
766     Mat A_diag = ((Mat_MPIAIJ *)A->data)->A;
767     ierr = MatConvert_AIJ_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
768     */
769   }
770   ierr = MatConvert_AIJ_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
771   PetscFunctionReturn(0);
772 }
773 EXTERN_C_END
774 
775