xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision ea2994c3e0d9cdf951e70db3ce412b4304589e7d)
1 /*
2         Provides an interface to the SuperLU_DIST_2.0 sparse solver
3 */
4 
5 #include "src/mat/impls/aij/seq/aij.h"
6 #include "src/mat/impls/aij/mpi/mpiaij.h"
7 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
8 #include "stdlib.h"
9 #endif
10 
11 EXTERN_C_BEGIN
12 #if defined(PETSC_USE_COMPLEX)
13 #include "superlu_zdefs.h"
14 #else
15 #include "superlu_ddefs.h"
16 #endif
17 EXTERN_C_END
18 
19 typedef enum { GLOBAL,DISTRIBUTED
20 } SuperLU_MatInputMode;
21 
22 typedef struct {
23   int_t                   nprow,npcol,*row,*col;
24   gridinfo_t              grid;
25   superlu_options_t       options;
26   SuperMatrix             A_sup;
27   ScalePermstruct_t       ScalePermstruct;
28   LUstruct_t              LUstruct;
29   int                     StatPrint;
30   int                     MatInputMode;
31   SOLVEstruct_t           SOLVEstruct;
32   MatStructure            flg;
33   MPI_Comm                comm_superlu;
34 #if defined(PETSC_USE_COMPLEX)
35   doublecomplex           *val;
36 #else
37   double                  *val;
38 #endif
39 
40   /* A few function pointers for inheritance */
41   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
42   PetscErrorCode (*MatView)(Mat,PetscViewer);
43   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
44   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
45   PetscErrorCode (*MatDestroy)(Mat);
46 
47   /* Flag to clean up (non-global) SuperLU objects during Destroy */
48   PetscTruth CleanUpSuperLU_Dist;
49 } Mat_SuperLU_DIST;
50 
51 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
52 
53 EXTERN_C_BEGIN
54 #undef __FUNCT__
55 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
56 PetscErrorCode MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
57 {
58   PetscErrorCode   ierr;
59   Mat              B=*newmat;
60   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
61 
62   PetscFunctionBegin;
63   if (reuse == MAT_INITIAL_MATRIX) {
64     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
65   }
66   /* Reset the original function pointers */
67   B->ops->duplicate        = lu->MatDuplicate;
68   B->ops->view             = lu->MatView;
69   B->ops->assemblyend      = lu->MatAssemblyEnd;
70   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
71   B->ops->destroy          = lu->MatDestroy;
72 
73   ierr = PetscFree(lu);CHKERRQ(ierr);
74 
75   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
76   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
77   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
78   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
79 
80   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
81   *newmat = B;
82   PetscFunctionReturn(0);
83 }
84 EXTERN_C_END
85 
86 #undef __FUNCT__
87 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
88 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
89 {
90   PetscErrorCode   ierr;
91   PetscMPIInt      size;
92   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
93 
94   PetscFunctionBegin;
95   if (lu->CleanUpSuperLU_Dist) {
96     /* Deallocate SuperLU_DIST storage */
97     if (lu->MatInputMode == GLOBAL) {
98       Destroy_CompCol_Matrix_dist(&lu->A_sup);
99     } else {
100       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
101       if ( lu->options.SolveInitialized ) {
102 #if defined(PETSC_USE_COMPLEX)
103         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
104 #else
105         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
106 #endif
107       }
108     }
109     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
110     ScalePermstructFree(&lu->ScalePermstruct);
111     LUstructFree(&lu->LUstruct);
112 
113     /* Release the SuperLU_DIST process grid. */
114     superlu_gridexit(&lu->grid);
115 
116     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
117   }
118 
119   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
120   if (size == 1) {
121     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
122   } else {
123     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
124   }
125   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
126 
127   PetscFunctionReturn(0);
128 }
129 
130 #undef __FUNCT__
131 #define __FUNCT__ "MatSolve_SuperLU_DIST"
132 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
133 {
134   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
135   PetscErrorCode   ierr;
136   PetscMPIInt      size;
137   PetscInt         m=A->M, N=A->N;
138   SuperLUStat_t    stat;
139   double           berr[1];
140   PetscScalar      *bptr;
141   PetscInt         info, nrhs=1;
142   Vec              x_seq;
143   IS               iden;
144   VecScatter       scat;
145 
146   PetscFunctionBegin;
147   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
148   if (size > 1) {
149     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
150       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
151       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
152       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
153       ierr = ISDestroy(iden);CHKERRQ(ierr);
154 
155       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
156       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
157       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
158     } else { /* distributed mat input */
159       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
160       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
161     }
162   } else { /* size == 1 */
163     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
164     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
165   }
166 
167   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
168 
169   PStatInit(&stat);        /* Initialize the statistics variables. */
170 
171   if (lu->MatInputMode == GLOBAL) {
172 #if defined(PETSC_USE_COMPLEX)
173     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
174                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
175 #else
176     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
177                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
178 #endif
179   } else { /* distributed mat input */
180 #if defined(PETSC_USE_COMPLEX)
181     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
182 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
183     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
184 #else
185     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
186 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
187     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
188 #endif
189   }
190   if (lu->StatPrint) {
191      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
192   }
193   PStatFree(&stat);
194 
195   if (size > 1) {
196     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
197       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
198       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
199       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
200       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
201       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
202     } else {
203       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
204     }
205   } else {
206     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
207   }
208   PetscFunctionReturn(0);
209 }
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
213 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
214 {
215   Mat              *tseq,A_seq = PETSC_NULL;
216   Mat_SeqAIJ       *aa,*bb;
217   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
218   PetscErrorCode   ierr;
219   PetscInt         M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
220                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
221   PetscMPIInt      size,rank;
222   SuperLUStat_t    stat;
223   double           *berr=0;
224   IS               isrow;
225   PetscLogDouble   time0,time,time_min,time_max;
226 #if defined(PETSC_USE_COMPLEX)
227   doublecomplex    *av, *bv;
228 #else
229   double           *av, *bv;
230 #endif
231 
232   PetscFunctionBegin;
233   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
234   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
235 
236   if (lu->StatPrint) { /* collect time for mat conversion */
237     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
238     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
239   }
240 
241   if (lu->MatInputMode == GLOBAL) { /* global mat input */
242     if (size > 1) { /* convert mpi A to seq mat A */
243       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
244       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
245       ierr = ISDestroy(isrow);CHKERRQ(ierr);
246 
247       A_seq = *tseq;
248       ierr = PetscFree(tseq);CHKERRQ(ierr);
249       aa =  (Mat_SeqAIJ*)A_seq->data;
250     } else {
251       aa =  (Mat_SeqAIJ*)A->data;
252     }
253 
254     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
255     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
256 #if defined(PETSC_USE_COMPLEX)
257       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
258 #else
259       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
260 #endif
261     } else { /* successive numeric factorization, sparsity pattern is reused. */
262       Destroy_CompCol_Matrix_dist(&lu->A_sup);
263       Destroy_LU(N, &lu->grid, &lu->LUstruct);
264       lu->options.Fact = SamePattern;
265     }
266 #if defined(PETSC_USE_COMPLEX)
267     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
268 #else
269     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
270 #endif
271 
272     /* Create compressed column matrix A_sup. */
273 #if defined(PETSC_USE_COMPLEX)
274     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
275 #else
276     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
277 #endif
278   } else { /* distributed mat input */
279     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
280     aa=(Mat_SeqAIJ*)(mat->A)->data;
281     bb=(Mat_SeqAIJ*)(mat->B)->data;
282     ai=aa->i; aj=aa->j;
283     bi=bb->i; bj=bb->j;
284 #if defined(PETSC_USE_COMPLEX)
285     av=(doublecomplex*)aa->a;
286     bv=(doublecomplex*)bb->a;
287 #else
288     av=aa->a;
289     bv=bb->a;
290 #endif
291     rstart = mat->rstart;
292     nz     = aa->nz + bb->nz;
293     garray = mat->garray;
294     rstart = mat->rstart;
295 
296     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
297 #if defined(PETSC_USE_COMPLEX)
298       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
299 #else
300       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
301 #endif
302     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
303       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
304       Destroy_LU(N, &lu->grid, &lu->LUstruct);
305       lu->options.Fact = SamePattern;
306     }
307     nz = 0; irow = mat->rstart;
308     for ( i=0; i<m; i++ ) {
309       lu->row[i] = nz;
310       countA = ai[i+1] - ai[i];
311       countB = bi[i+1] - bi[i];
312       ajj = aj + ai[i];  /* ptr to the beginning of this row */
313       bjj = bj + bi[i];
314 
315       /* B part, smaller col index */
316       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
317       jB = 0;
318       for (j=0; j<countB; j++){
319         jcol = garray[bjj[j]];
320         if (jcol > colA_start) {
321           jB = j;
322           break;
323         }
324         lu->col[nz] = jcol;
325         lu->val[nz++] = *bv++;
326         if (j==countB-1) jB = countB;
327       }
328 
329       /* A part */
330       for (j=0; j<countA; j++){
331         lu->col[nz] = mat->rstart + ajj[j];
332         lu->val[nz++] = *av++;
333       }
334 
335       /* B part, larger col index */
336       for (j=jB; j<countB; j++){
337         lu->col[nz] = garray[bjj[j]];
338         lu->val[nz++] = *bv++;
339       }
340     }
341     lu->row[m] = nz;
342 #if defined(PETSC_USE_COMPLEX)
343     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
344 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
345 #else
346     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
347 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
348 #endif
349   }
350   if (lu->StatPrint) {
351     ierr = PetscGetTime(&time);CHKERRQ(ierr);
352     time0 = time - time0;
353   }
354 
355   /* Factor the matrix. */
356   PStatInit(&stat);   /* Initialize the statistics variables. */
357 
358   if (lu->MatInputMode == GLOBAL) { /* global mat input */
359 #if defined(PETSC_USE_COMPLEX)
360     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
361                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
362 #else
363     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
364                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
365 #endif
366   } else { /* distributed mat input */
367 #if defined(PETSC_USE_COMPLEX)
368     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
369 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
370     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
371 #else
372     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
373 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
374     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
375 #endif
376   }
377 
378   if (lu->MatInputMode == GLOBAL && size > 1){
379     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
380   }
381 
382   if (lu->StatPrint) {
383     if (size > 1){
384       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
385       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
386       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
387       time = time/size; /* average time */
388       if (!rank)
389         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
390                               %g / %g / %g\n",time_max,time_min,time);
391     } else {
392       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
393                               %g\n",time0);
394     }
395 
396     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
397   }
398   PStatFree(&stat);
399   (*F)->assembled = PETSC_TRUE;
400   lu->flg         = SAME_NONZERO_PATTERN;
401   PetscFunctionReturn(0);
402 }
403 
404 /* Note the Petsc r and c permutations are ignored */
405 #undef __FUNCT__
406 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
407 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
408 {
409   Mat               B;
410   Mat_SuperLU_DIST  *lu;
411   PetscErrorCode    ierr;
412   PetscInt          M=A->M,N=A->N,indx;
413   PetscMPIInt       size;
414   superlu_options_t options;
415   PetscTruth        flg;
416   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
417   const char        *prtype[] = {"LargeDiag","NATURAL"};
418 
419   PetscFunctionBegin;
420   /* Create the factorization matrix */
421   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
422   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
423   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
424   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
425 
426   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
427   B->ops->solve            = MatSolve_SuperLU_DIST;
428   B->factor                = FACTOR_LU;
429 
430   lu = (Mat_SuperLU_DIST*)(B->spptr);
431 
432   /* Set the input options */
433   set_default_options_dist(&options);
434   lu->MatInputMode = GLOBAL;
435   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
436 
437   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
438   lu->nprow = size/2;               /* Default process rows.      */
439   if (!lu->nprow) lu->nprow = 1;
440   lu->npcol = size/lu->nprow;           /* Default process columns.   */
441 
442   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
443 
444     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
445     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
446     if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol");
447 
448     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
449     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
450 
451     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
452     if (!flg) {
453       options.Equil = NO;
454     }
455 
456     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
457     if (flg) {
458       switch (indx) {
459       case 0:
460         options.RowPerm = LargeDiag;
461         break;
462       case 1:
463         options.RowPerm = NOROWPERM;
464         break;
465       }
466     }
467 
468     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
469     if (flg) {
470       switch (indx) {
471       case 0:
472         options.ColPerm = MMD_AT_PLUS_A;
473         break;
474       case 1:
475         options.ColPerm = NATURAL;
476         break;
477       case 2:
478         options.ColPerm = MMD_ATA;
479         break;
480       case 3:
481         options.ColPerm = COLAMD;
482         break;
483       }
484     }
485 
486     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
487     if (!flg) {
488       options.ReplaceTinyPivot = NO;
489     }
490 
491     options.IterRefine = NOREFINE;
492     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
493     if (flg) {
494       options.IterRefine = DOUBLE;
495     }
496 
497     if (PetscLogPrintInfo) {
498       lu->StatPrint = (PetscInt)PETSC_TRUE;
499     } else {
500       lu->StatPrint = (PetscInt)PETSC_FALSE;
501     }
502     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
503                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
504   PetscOptionsEnd();
505 
506   /* Initialize the SuperLU process grid. */
507   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
508 
509   /* Initialize ScalePermstruct and LUstruct. */
510   ScalePermstructInit(M, N, &lu->ScalePermstruct);
511   LUstructInit(M, N, &lu->LUstruct);
512 
513   lu->options            = options;
514   lu->flg                = DIFFERENT_NONZERO_PATTERN;
515   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
516   *F = B;
517   PetscFunctionReturn(0);
518 }
519 
520 #undef __FUNCT__
521 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
522 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
523   PetscErrorCode   ierr;
524   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
525 
526   PetscFunctionBegin;
527   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
528   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
529   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
530   PetscFunctionReturn(0);
531 }
532 
533 #undef __FUNCT__
534 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
535 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
536 {
537   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
538   superlu_options_t options;
539   PetscErrorCode    ierr;
540 
541   PetscFunctionBegin;
542   /* check if matrix is superlu_dist type */
543   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
544 
545   options = lu->options;
546   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
547   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",(options.Equil != NO) ? "true": "false");CHKERRQ(ierr);
548   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
549   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",(options.ReplaceTinyPivot != NO) ? "true": "false");CHKERRQ(ierr);
550   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",(options.IterRefine == DOUBLE) ? "true": "false");CHKERRQ(ierr);
551   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
552   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
553   if (options.ColPerm == NATURAL) {
554     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
555   } else if (options.ColPerm == MMD_AT_PLUS_A) {
556     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
557   } else if (options.ColPerm == MMD_ATA) {
558     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
559   } else if (options.ColPerm == COLAMD) {
560     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation COLAMD\n");CHKERRQ(ierr);
561   } else {
562     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
563   }
564   PetscFunctionReturn(0);
565 }
566 
567 #undef __FUNCT__
568 #define __FUNCT__ "MatView_SuperLU_DIST"
569 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
570 {
571   PetscErrorCode    ierr;
572   PetscTruth        iascii;
573   PetscViewerFormat format;
574   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
575 
576   PetscFunctionBegin;
577   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
578 
579   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
580   if (iascii) {
581     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
582     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
583       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
584     }
585   }
586   PetscFunctionReturn(0);
587 }
588 
589 
590 EXTERN_C_BEGIN
591 #undef __FUNCT__
592 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
593 PetscErrorCode MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
594 {
595   /* This routine is only called to convert to MATSUPERLU_DIST */
596   /* from MATSEQAIJ if A has a single process communicator */
597   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
598   PetscErrorCode   ierr;
599   PetscMPIInt      size;
600   MPI_Comm         comm;
601   Mat              B=*newmat;
602   Mat_SuperLU_DIST *lu;
603 
604   PetscFunctionBegin;
605   if (reuse == MAT_INITIAL_MATRIX) {
606     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
607   }
608 
609   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
610   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
611 
612   lu->MatDuplicate         = A->ops->duplicate;
613   lu->MatView              = A->ops->view;
614   lu->MatAssemblyEnd       = A->ops->assemblyend;
615   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
616   lu->MatDestroy           = A->ops->destroy;
617   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
618 
619   B->spptr                 = (void*)lu;
620   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
621   B->ops->view             = MatView_SuperLU_DIST;
622   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
623   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
624   B->ops->destroy          = MatDestroy_SuperLU_DIST;
625   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
626   if (size == 1) {
627     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
628                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
629     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
630                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
631   } else {
632     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
633                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
634     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
635                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
636   }
637   ierr = PetscLogInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr);
638   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
639   *newmat = B;
640   PetscFunctionReturn(0);
641 }
642 EXTERN_C_END
643 
644 #undef __FUNCT__
645 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
646 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
647   PetscErrorCode   ierr;
648   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
649 
650   PetscFunctionBegin;
651   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
652   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
653   PetscFunctionReturn(0);
654 }
655 
656 /*MC
657   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
658   via the external package SuperLU_DIST.
659 
660   If SuperLU_DIST is installed (see the manual for
661   instructions on how to declare the existence of external packages),
662   a matrix type can be constructed which invokes SuperLU_DIST solvers.
663   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
664 
665   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
666   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
667   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
668   for communicators controlling multiple processes.  It is recommended that you call both of
669   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
670   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
671   without data copy.
672 
673   Options Database Keys:
674 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
675 . -mat_superlu_dist_r <n> - number of rows in processor partition
676 . -mat_superlu_dist_c <n> - number of columns in processor partition
677 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
678 . -mat_superlu_dist_equil - equilibrate the matrix
679 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
680 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
681 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
682 . -mat_superlu_dist_iterrefine - use iterative refinement
683 - -mat_superlu_dist_statprint - print factorization information
684 
685    Level: beginner
686 
687 .seealso: PCLU
688 M*/
689 
690 EXTERN_C_BEGIN
691 #undef __FUNCT__
692 #define __FUNCT__ "MatCreate_SuperLU_DIST"
693 PetscErrorCode MatCreate_SuperLU_DIST(Mat A)
694 {
695   PetscErrorCode ierr;
696   PetscMPIInt    size;
697   Mat            A_diag;
698 
699   PetscFunctionBegin;
700   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
701   /*   and SuperLU_DIST types */
702   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
703   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
704   if (size == 1) {
705     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
706   } else {
707     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
708     A_diag = ((Mat_MPIAIJ *)A->data)->A;
709     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
710   }
711   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
712   PetscFunctionReturn(0);
713 }
714 EXTERN_C_END
715 
716