xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision 09f3b4e5628a00a1eaf17d80982cfbcc515cc9c1)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum { GLOBAL,DISTRIBUTED
22 } SuperLU_MatInputMode;
23 
24 typedef struct {
25   int_t                   nprow,npcol,*row,*col;
26   gridinfo_t              grid;
27   superlu_options_t       options;
28   SuperMatrix             A_sup;
29   ScalePermstruct_t       ScalePermstruct;
30   LUstruct_t              LUstruct;
31   int                     StatPrint;
32   int                     MatInputMode;
33   SOLVEstruct_t           SOLVEstruct;
34   MatStructure            flg;
35   MPI_Comm                comm_superlu;
36 #if defined(PETSC_USE_COMPLEX)
37   doublecomplex           *val;
38 #else
39   double                  *val;
40 #endif
41 
42   /* A few function pointers for inheritance */
43   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   PetscErrorCode (*MatView)(Mat,PetscViewer);
45   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 
55 EXTERN_C_BEGIN
56 #undef __FUNCT__
57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,MatType type,MatReuse reuse,Mat *newmat)
59 {
60   PetscErrorCode   ierr;
61   Mat              B=*newmat;
62   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
63 
64   PetscFunctionBegin;
65   if (reuse == MAT_INITIAL_MATRIX) {
66     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
67   }
68   /* Reset the original function pointers */
69   B->ops->duplicate        = lu->MatDuplicate;
70   B->ops->view             = lu->MatView;
71   B->ops->assemblyend      = lu->MatAssemblyEnd;
72   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
73   B->ops->destroy          = lu->MatDestroy;
74 
75   ierr = PetscFree(lu);CHKERRQ(ierr);
76 
77   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
78   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
79   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
80   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
81 
82   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
83   *newmat = B;
84   PetscFunctionReturn(0);
85 }
86 EXTERN_C_END
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
91 {
92   PetscErrorCode   ierr;
93   PetscMPIInt      size;
94   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
95 
96   PetscFunctionBegin;
97   if (lu->CleanUpSuperLU_Dist) {
98     /* Deallocate SuperLU_DIST storage */
99     if (lu->MatInputMode == GLOBAL) {
100       Destroy_CompCol_Matrix_dist(&lu->A_sup);
101     } else {
102       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
103       if ( lu->options.SolveInitialized ) {
104 #if defined(PETSC_USE_COMPLEX)
105         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
106 #else
107         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
108 #endif
109       }
110     }
111     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
112     ScalePermstructFree(&lu->ScalePermstruct);
113     LUstructFree(&lu->LUstruct);
114 
115     /* Release the SuperLU_DIST process grid. */
116     superlu_gridexit(&lu->grid);
117 
118     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
119   }
120 
121   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
122   if (size == 1) {
123     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
124   } else {
125     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
126   }
127   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
128 
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatSolve_SuperLU_DIST"
134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
135 {
136   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
137   PetscErrorCode   ierr;
138   PetscMPIInt      size;
139   PetscInt         m=A->M, N=A->N;
140   SuperLUStat_t    stat;
141   double           berr[1];
142   PetscScalar      *bptr;
143   PetscInt         info, nrhs=1;
144   Vec              x_seq;
145   IS               iden;
146   VecScatter       scat;
147 
148   PetscFunctionBegin;
149   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
150   if (size > 1) {
151     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
152       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
153       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
154       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
155       ierr = ISDestroy(iden);CHKERRQ(ierr);
156 
157       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
158       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
159       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
160     } else { /* distributed mat input */
161       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
162       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
163     }
164   } else { /* size == 1 */
165     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
166     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
167   }
168 
169   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
170 
171   PStatInit(&stat);        /* Initialize the statistics variables. */
172   if (lu->MatInputMode == GLOBAL) {
173 #if defined(PETSC_USE_COMPLEX)
174     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
175                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
176 #else
177     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
178                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
179 #endif
180   } else { /* distributed mat input */
181 #if defined(PETSC_USE_COMPLEX)
182     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
183 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
184     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
185 #else
186     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
187 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
188     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
189 #endif
190   }
191   if (lu->options.PrintStat) {
192      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
193   }
194   PStatFree(&stat);
195 
196   if (size > 1) {
197     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
198       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
199       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
200       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
201       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
202       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
203     } else {
204       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
205     }
206   } else {
207     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
208   }
209   PetscFunctionReturn(0);
210 }
211 
212 #undef __FUNCT__
213 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
214 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
215 {
216   Mat              *tseq,A_seq = PETSC_NULL;
217   Mat_SeqAIJ       *aa,*bb;
218   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
219   PetscErrorCode   ierr;
220   PetscInt         M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
221                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
222   PetscMPIInt      size,rank;
223   SuperLUStat_t    stat;
224   double           *berr=0;
225   IS               isrow;
226   PetscLogDouble   time0,time,time_min,time_max;
227   Mat              F_diag=PETSC_NULL;
228 #if defined(PETSC_USE_COMPLEX)
229   doublecomplex    *av, *bv;
230 #else
231   double           *av, *bv;
232 #endif
233 
234   PetscFunctionBegin;
235   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
236   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
237 
238   if (lu->options.PrintStat) { /* collect time for mat conversion */
239     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
240     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
241   }
242 
243   if (lu->MatInputMode == GLOBAL) { /* global mat input */
244     if (size > 1) { /* convert mpi A to seq mat A */
245       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
246       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
247       ierr = ISDestroy(isrow);CHKERRQ(ierr);
248 
249       A_seq = *tseq;
250       ierr = PetscFree(tseq);CHKERRQ(ierr);
251       aa =  (Mat_SeqAIJ*)A_seq->data;
252     } else {
253       aa =  (Mat_SeqAIJ*)A->data;
254     }
255 
256     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
257     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
258 #if defined(PETSC_USE_COMPLEX)
259       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
260 #else
261       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
262 #endif
263     } else { /* successive numeric factorization, sparsity pattern is reused. */
264       Destroy_CompCol_Matrix_dist(&lu->A_sup);
265       Destroy_LU(N, &lu->grid, &lu->LUstruct);
266       lu->options.Fact = SamePattern;
267     }
268 #if defined(PETSC_USE_COMPLEX)
269     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
270 #else
271     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
272 #endif
273 
274     /* Create compressed column matrix A_sup. */
275 #if defined(PETSC_USE_COMPLEX)
276     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
277 #else
278     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
279 #endif
280   } else { /* distributed mat input */
281     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
282     aa=(Mat_SeqAIJ*)(mat->A)->data;
283     bb=(Mat_SeqAIJ*)(mat->B)->data;
284     ai=aa->i; aj=aa->j;
285     bi=bb->i; bj=bb->j;
286 #if defined(PETSC_USE_COMPLEX)
287     av=(doublecomplex*)aa->a;
288     bv=(doublecomplex*)bb->a;
289 #else
290     av=aa->a;
291     bv=bb->a;
292 #endif
293     rstart = mat->rstart;
294     nz     = aa->nz + bb->nz;
295     garray = mat->garray;
296     rstart = mat->rstart;
297 
298     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
299 #if defined(PETSC_USE_COMPLEX)
300       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
301 #else
302       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
303 #endif
304     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
305       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
306       Destroy_LU(N, &lu->grid, &lu->LUstruct);
307       lu->options.Fact = SamePattern;
308     }
309     nz = 0; irow = mat->rstart;
310     for ( i=0; i<m; i++ ) {
311       lu->row[i] = nz;
312       countA = ai[i+1] - ai[i];
313       countB = bi[i+1] - bi[i];
314       ajj = aj + ai[i];  /* ptr to the beginning of this row */
315       bjj = bj + bi[i];
316 
317       /* B part, smaller col index */
318       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
319       jB = 0;
320       for (j=0; j<countB; j++){
321         jcol = garray[bjj[j]];
322         if (jcol > colA_start) {
323           jB = j;
324           break;
325         }
326         lu->col[nz] = jcol;
327         lu->val[nz++] = *bv++;
328         if (j==countB-1) jB = countB;
329       }
330 
331       /* A part */
332       for (j=0; j<countA; j++){
333         lu->col[nz] = mat->rstart + ajj[j];
334         lu->val[nz++] = *av++;
335       }
336 
337       /* B part, larger col index */
338       for (j=jB; j<countB; j++){
339         lu->col[nz] = garray[bjj[j]];
340         lu->val[nz++] = *bv++;
341       }
342     }
343     lu->row[m] = nz;
344 #if defined(PETSC_USE_COMPLEX)
345     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
346 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
347 #else
348     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
349 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
350 #endif
351   }
352   if (lu->options.PrintStat) {
353     ierr = PetscGetTime(&time);CHKERRQ(ierr);
354     time0 = time - time0;
355   }
356 
357   /* Factor the matrix. */
358   PStatInit(&stat);   /* Initialize the statistics variables. */
359 
360   if (lu->MatInputMode == GLOBAL) { /* global mat input */
361 #if defined(PETSC_USE_COMPLEX)
362     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
363                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
364 #else
365     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
366                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
367 #endif
368   } else { /* distributed mat input */
369 #if defined(PETSC_USE_COMPLEX)
370     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
371 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
372     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
373 #else
374     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
375 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
376     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
377 #endif
378   }
379 
380   if (lu->MatInputMode == GLOBAL && size > 1){
381     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
382   }
383 
384   if (lu->options.PrintStat) {
385     if (size > 1){
386       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
387       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
388       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
389       time = time/size; /* average time */
390       if (!rank)
391         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
392                               %g / %g / %g\n",time_max,time_min,time);
393     } else {
394       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
395                               %g\n",time0);
396     }
397 
398     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
399   }
400   PStatFree(&stat);
401   if (size > 1){
402     F_diag = ((Mat_MPIAIJ *)(*F)->data)->A;
403     F_diag->assembled = PETSC_TRUE;
404   }
405   (*F)->assembled   = PETSC_TRUE;
406   lu->flg           = SAME_NONZERO_PATTERN;
407   PetscFunctionReturn(0);
408 }
409 
410 /* Note the Petsc r and c permutations are ignored */
411 #undef __FUNCT__
412 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
413 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
414 {
415   Mat               B;
416   Mat_SuperLU_DIST  *lu;
417   PetscErrorCode    ierr;
418   PetscInt          M=A->M,N=A->N,indx;
419   PetscMPIInt       size;
420   superlu_options_t options;
421   PetscTruth        flg;
422   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA"};
423   const char        *prtype[] = {"LargeDiag","NATURAL"};
424 
425   PetscFunctionBegin;
426   /* Create the factorization matrix */
427   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
428   ierr = MatSetSizes(B,A->m,A->n,M,N);CHKERRQ(ierr);
429   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
430   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
431   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
432 
433   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
434   B->ops->solve            = MatSolve_SuperLU_DIST;
435   B->factor                = FACTOR_LU;
436 
437   lu = (Mat_SuperLU_DIST*)(B->spptr);
438 
439   /* Set the input options */
440   set_default_options_dist(&options);
441 
442   lu->MatInputMode = GLOBAL;
443   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
444 
445   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
446   lu->nprow = size/2;               /* Default process rows.      */
447   if (!lu->nprow) lu->nprow = 1;
448   lu->npcol = size/lu->nprow;           /* Default process columns.   */
449 
450   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
451 
452     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
453     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
454     if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol");
455 
456     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
457     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
458 
459     ierr = PetscOptionsTruth("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
460     if (!flg) {
461       options.Equil = NO;
462     }
463 
464     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
465     if (flg) {
466       switch (indx) {
467       case 0:
468         options.RowPerm = LargeDiag;
469         break;
470       case 1:
471         options.RowPerm = NOROWPERM;
472         break;
473       }
474     }
475 
476     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
477     if (flg) {
478       switch (indx) {
479       case 0:
480         options.ColPerm = MMD_AT_PLUS_A;
481         break;
482       case 1:
483         options.ColPerm = NATURAL;
484         break;
485       case 2:
486         options.ColPerm = MMD_ATA;
487         break;
488       }
489     }
490 
491     ierr = PetscOptionsTruth("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
492     if (!flg) {
493       options.ReplaceTinyPivot = NO;
494     }
495 
496     options.IterRefine = NOREFINE;
497     ierr = PetscOptionsTruth("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
498     if (flg) {
499       options.IterRefine = DOUBLE;
500     }
501 
502     if (PetscLogPrintInfo) {
503       options.PrintStat = YES;
504     } else {
505       options.PrintStat = NO;
506     }
507     ierr = PetscOptionsTruth("-mat_superlu_dist_statprint","Print factorization information","None",
508                               (PetscTruth)options.PrintStat,(PetscTruth*)&options.PrintStat,0);CHKERRQ(ierr);
509   PetscOptionsEnd();
510 
511   /* Initialize the SuperLU process grid. */
512   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
513 
514   /* Initialize ScalePermstruct and LUstruct. */
515   ScalePermstructInit(M, N, &lu->ScalePermstruct);
516   LUstructInit(M, N, &lu->LUstruct);
517 
518   lu->options            = options;
519   lu->flg                = DIFFERENT_NONZERO_PATTERN;
520   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
521   *F = B;
522 
523   PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
528 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
529   PetscErrorCode   ierr;
530   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
531 
532   PetscFunctionBegin;
533   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
534   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
535   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
536   PetscFunctionReturn(0);
537 }
538 
539 #undef __FUNCT__
540 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
541 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
542 {
543   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
544   superlu_options_t options;
545   PetscErrorCode    ierr;
546 
547   PetscFunctionBegin;
548   /* check if matrix is superlu_dist type */
549   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
550 
551   options = lu->options;
552   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
553   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
554   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
555   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr);
556   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
557   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
558   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
559   if (options.ColPerm == NATURAL) {
560     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
561   } else if (options.ColPerm == MMD_AT_PLUS_A) {
562     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
563   } else if (options.ColPerm == MMD_ATA) {
564     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
565   } else {
566     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
567   }
568   PetscFunctionReturn(0);
569 }
570 
571 #undef __FUNCT__
572 #define __FUNCT__ "MatView_SuperLU_DIST"
573 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
574 {
575   PetscErrorCode    ierr;
576   PetscTruth        iascii;
577   PetscViewerFormat format;
578   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
579 
580   PetscFunctionBegin;
581   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
582 
583   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
584   if (iascii) {
585     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
586     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
587       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
588     }
589   }
590   PetscFunctionReturn(0);
591 }
592 
593 
594 EXTERN_C_BEGIN
595 #undef __FUNCT__
596 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
597 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,MatType type,MatReuse reuse,Mat *newmat)
598 {
599   /* This routine is only called to convert to MATSUPERLU_DIST */
600   /* from MATSEQAIJ if A has a single process communicator */
601   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
602   PetscErrorCode   ierr;
603   PetscMPIInt      size;
604   MPI_Comm         comm;
605   Mat              B=*newmat;
606   Mat_SuperLU_DIST *lu;
607 
608   PetscFunctionBegin;
609   if (reuse == MAT_INITIAL_MATRIX) {
610     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
611   }
612 
613   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
614   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
615 
616   lu->MatDuplicate         = A->ops->duplicate;
617   lu->MatView              = A->ops->view;
618   lu->MatAssemblyEnd       = A->ops->assemblyend;
619   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
620   lu->MatDestroy           = A->ops->destroy;
621   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
622 
623   B->spptr                 = (void*)lu;
624   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
625   B->ops->view             = MatView_SuperLU_DIST;
626   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
627   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
628   B->ops->destroy          = MatDestroy_SuperLU_DIST;
629   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
630   if (size == 1) {
631     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
632                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
633     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
634                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
635   } else {
636     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
637                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
638     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
639                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
640   }
641   ierr = PetscVerboseInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr);
642   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
643   *newmat = B;
644   PetscFunctionReturn(0);
645 }
646 EXTERN_C_END
647 
648 #undef __FUNCT__
649 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
650 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
651   PetscErrorCode   ierr;
652   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
653 
654   PetscFunctionBegin;
655   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
656   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
657   PetscFunctionReturn(0);
658 }
659 
660 /*MC
661   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
662   via the external package SuperLU_DIST.
663 
664   If SuperLU_DIST is installed (see the manual for
665   instructions on how to declare the existence of external packages),
666   a matrix type can be constructed which invokes SuperLU_DIST solvers.
667   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
668 
669   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
670   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
671   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
672   for communicators controlling multiple processes.  It is recommended that you call both of
673   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
674   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
675   without data copy.
676 
677   Options Database Keys:
678 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
679 . -mat_superlu_dist_r <n> - number of rows in processor partition
680 . -mat_superlu_dist_c <n> - number of columns in processor partition
681 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
682 . -mat_superlu_dist_equil - equilibrate the matrix
683 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
684 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,NATURAL> - column permutation
685 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
686 . -mat_superlu_dist_iterrefine - use iterative refinement
687 - -mat_superlu_dist_statprint - print factorization information
688 
689    Level: beginner
690 
691 .seealso: PCLU
692 M*/
693 
694 EXTERN_C_BEGIN
695 #undef __FUNCT__
696 #define __FUNCT__ "MatCreate_SuperLU_DIST"
697 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
698 {
699   PetscErrorCode ierr;
700   PetscMPIInt    size;
701   Mat            A_diag;
702 
703   PetscFunctionBegin;
704   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
705   /*   and SuperLU_DIST types */
706   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
707   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
708   if (size == 1) {
709     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
710   } else {
711     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
712     A_diag = ((Mat_MPIAIJ *)A->data)->A;
713     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
714   }
715   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
716   PetscFunctionReturn(0);
717 }
718 EXTERN_C_END
719 
720