xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision df48e8d96c19df54efdde2c76f874a16fcafd8fe)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum { GLOBAL,DISTRIBUTED
22 } SuperLU_MatInputMode;
23 
24 typedef struct {
25   int_t                   nprow,npcol,*row,*col;
26   gridinfo_t              grid;
27   superlu_options_t       options;
28   SuperMatrix             A_sup;
29   ScalePermstruct_t       ScalePermstruct;
30   LUstruct_t              LUstruct;
31   int                     StatPrint;
32   int                     MatInputMode;
33   SOLVEstruct_t           SOLVEstruct;
34   MatStructure            flg;
35   MPI_Comm                comm_superlu;
36 #if defined(PETSC_USE_COMPLEX)
37   doublecomplex           *val;
38 #else
39   double                  *val;
40 #endif
41 
42   /* A few function pointers for inheritance */
43   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   PetscErrorCode (*MatView)(Mat,PetscViewer);
45   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 
55 EXTERN_C_BEGIN
56 #undef __FUNCT__
57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
59 {
60   PetscErrorCode   ierr;
61   Mat              B=*newmat;
62   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
63 
64   PetscFunctionBegin;
65   if (reuse == MAT_INITIAL_MATRIX) {
66     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
67   }
68   /* Reset the original function pointers */
69   B->ops->duplicate        = lu->MatDuplicate;
70   B->ops->view             = lu->MatView;
71   B->ops->assemblyend      = lu->MatAssemblyEnd;
72   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
73   B->ops->destroy          = lu->MatDestroy;
74 
75   ierr = PetscFree(lu);CHKERRQ(ierr);
76 
77   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
78   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
79   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
80   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
81 
82   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
83   *newmat = B;
84   PetscFunctionReturn(0);
85 }
86 EXTERN_C_END
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
91 {
92   PetscErrorCode   ierr;
93   PetscMPIInt      size;
94   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
95 
96   PetscFunctionBegin;
97   if (lu->CleanUpSuperLU_Dist) {
98     /* Deallocate SuperLU_DIST storage */
99     if (lu->MatInputMode == GLOBAL) {
100       Destroy_CompCol_Matrix_dist(&lu->A_sup);
101     } else {
102       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
103       if ( lu->options.SolveInitialized ) {
104 #if defined(PETSC_USE_COMPLEX)
105         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
106 #else
107         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
108 #endif
109       }
110     }
111     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
112     ScalePermstructFree(&lu->ScalePermstruct);
113     LUstructFree(&lu->LUstruct);
114 
115     /* Release the SuperLU_DIST process grid. */
116     superlu_gridexit(&lu->grid);
117 
118     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
119   }
120 
121   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
122   if (size == 1) {
123     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
124   } else {
125     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
126   }
127   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
128 
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatSolve_SuperLU_DIST"
134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
135 {
136   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
137   PetscErrorCode   ierr;
138   PetscMPIInt      size;
139   PetscInt         m=A->M, N=A->N;
140   SuperLUStat_t    stat;
141   double           berr[1];
142   PetscScalar      *bptr;
143   PetscInt         info, nrhs=1;
144   Vec              x_seq;
145   IS               iden;
146   VecScatter       scat;
147 
148   PetscFunctionBegin;
149   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
150   if (size > 1) {
151     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
152       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
153       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
154       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
155       ierr = ISDestroy(iden);CHKERRQ(ierr);
156 
157       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
158       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
159       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
160     } else { /* distributed mat input */
161       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
162       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
163     }
164   } else { /* size == 1 */
165     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
166     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
167   }
168 
169   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
170 
171   PStatInit(&stat);        /* Initialize the statistics variables. */
172 
173   if (lu->MatInputMode == GLOBAL) {
174 #if defined(PETSC_USE_COMPLEX)
175     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
176                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
177 #else
178     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
179                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
180 #endif
181   } else { /* distributed mat input */
182 #if defined(PETSC_USE_COMPLEX)
183     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
184 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
185     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
186 #else
187     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
188 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
189     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
190 #endif
191   }
192   if (lu->StatPrint) {
193      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
194   }
195   PStatFree(&stat);
196 
197   if (size > 1) {
198     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
199       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
200       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
201       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
202       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
203       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
204     } else {
205       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
206     }
207   } else {
208     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
209   }
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
215 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
216 {
217   Mat              *tseq,A_seq = PETSC_NULL;
218   Mat_SeqAIJ       *aa,*bb;
219   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
220   PetscErrorCode   ierr;
221   PetscInt         M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
222                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
223   PetscMPIInt      size,rank;
224   SuperLUStat_t    stat;
225   double           *berr=0;
226   IS               isrow;
227   PetscLogDouble   time0,time,time_min,time_max;
228 #if defined(PETSC_USE_COMPLEX)
229   doublecomplex    *av, *bv;
230 #else
231   double           *av, *bv;
232 #endif
233 
234   PetscFunctionBegin;
235   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
236   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
237 
238   if (lu->StatPrint) { /* collect time for mat conversion */
239     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
240     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
241   }
242 
243   if (lu->MatInputMode == GLOBAL) { /* global mat input */
244     if (size > 1) { /* convert mpi A to seq mat A */
245       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
246       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
247       ierr = ISDestroy(isrow);CHKERRQ(ierr);
248 
249       A_seq = *tseq;
250       ierr = PetscFree(tseq);CHKERRQ(ierr);
251       aa =  (Mat_SeqAIJ*)A_seq->data;
252     } else {
253       aa =  (Mat_SeqAIJ*)A->data;
254     }
255 
256     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
257     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
258 #if defined(PETSC_USE_COMPLEX)
259       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
260 #else
261       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
262 #endif
263     } else { /* successive numeric factorization, sparsity pattern is reused. */
264       Destroy_CompCol_Matrix_dist(&lu->A_sup);
265       Destroy_LU(N, &lu->grid, &lu->LUstruct);
266       lu->options.Fact = SamePattern;
267     }
268 #if defined(PETSC_USE_COMPLEX)
269     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
270 #else
271     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
272 #endif
273 
274     /* Create compressed column matrix A_sup. */
275 #if defined(PETSC_USE_COMPLEX)
276     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
277 #else
278     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
279 #endif
280   } else { /* distributed mat input */
281     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
282     aa=(Mat_SeqAIJ*)(mat->A)->data;
283     bb=(Mat_SeqAIJ*)(mat->B)->data;
284     ai=aa->i; aj=aa->j;
285     bi=bb->i; bj=bb->j;
286 #if defined(PETSC_USE_COMPLEX)
287     av=(doublecomplex*)aa->a;
288     bv=(doublecomplex*)bb->a;
289 #else
290     av=aa->a;
291     bv=bb->a;
292 #endif
293     rstart = mat->rstart;
294     nz     = aa->nz + bb->nz;
295     garray = mat->garray;
296     rstart = mat->rstart;
297 
298     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
299 #if defined(PETSC_USE_COMPLEX)
300       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
301 #else
302       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
303 #endif
304     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
305       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
306       Destroy_LU(N, &lu->grid, &lu->LUstruct);
307       lu->options.Fact = SamePattern;
308     }
309     nz = 0; irow = mat->rstart;
310     for ( i=0; i<m; i++ ) {
311       lu->row[i] = nz;
312       countA = ai[i+1] - ai[i];
313       countB = bi[i+1] - bi[i];
314       ajj = aj + ai[i];  /* ptr to the beginning of this row */
315       bjj = bj + bi[i];
316 
317       /* B part, smaller col index */
318       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
319       jB = 0;
320       for (j=0; j<countB; j++){
321         jcol = garray[bjj[j]];
322         if (jcol > colA_start) {
323           jB = j;
324           break;
325         }
326         lu->col[nz] = jcol;
327         lu->val[nz++] = *bv++;
328         if (j==countB-1) jB = countB;
329       }
330 
331       /* A part */
332       for (j=0; j<countA; j++){
333         lu->col[nz] = mat->rstart + ajj[j];
334         lu->val[nz++] = *av++;
335       }
336 
337       /* B part, larger col index */
338       for (j=jB; j<countB; j++){
339         lu->col[nz] = garray[bjj[j]];
340         lu->val[nz++] = *bv++;
341       }
342     }
343     lu->row[m] = nz;
344 #if defined(PETSC_USE_COMPLEX)
345     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
346 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
347 #else
348     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
349 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
350 #endif
351   }
352   if (lu->StatPrint) {
353     ierr = PetscGetTime(&time);CHKERRQ(ierr);
354     time0 = time - time0;
355   }
356 
357   /* Factor the matrix. */
358   PStatInit(&stat);   /* Initialize the statistics variables. */
359 
360   if (lu->MatInputMode == GLOBAL) { /* global mat input */
361 #if defined(PETSC_USE_COMPLEX)
362     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
363                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
364 #else
365     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
366                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
367 #endif
368   } else { /* distributed mat input */
369 #if defined(PETSC_USE_COMPLEX)
370     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
371 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
372     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
373 #else
374     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
375 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
376     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
377 #endif
378   }
379 
380   if (lu->MatInputMode == GLOBAL && size > 1){
381     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
382   }
383 
384   if (lu->StatPrint) {
385     if (size > 1){
386       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
387       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
388       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
389       time = time/size; /* average time */
390       if (!rank)
391         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
392                               %g / %g / %g\n",time_max,time_min,time);
393     } else {
394       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
395                               %g\n",time0);
396     }
397 
398     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
399   }
400   PStatFree(&stat);
401   (*F)->assembled = PETSC_TRUE;
402   lu->flg         = SAME_NONZERO_PATTERN;
403   PetscFunctionReturn(0);
404 }
405 
406 /* Note the Petsc r and c permutations are ignored */
407 #undef __FUNCT__
408 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
409 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
410 {
411   Mat               B;
412   Mat_SuperLU_DIST  *lu;
413   PetscErrorCode    ierr;
414   PetscInt          M=A->M,N=A->N,indx;
415   PetscMPIInt       size;
416   superlu_options_t options;
417   PetscTruth        flg;
418   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
419   const char        *prtype[] = {"LargeDiag","NATURAL"};
420 
421   PetscFunctionBegin;
422   /* Create the factorization matrix */
423   ierr = MatCreate(A->comm,&B);CHKERRQ(ierr);
424   ierr = MatSetSizes(B,A->m,A->n,M,N);CHKERRQ(ierr);
425   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
426   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
427   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
428 
429   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
430   B->ops->solve            = MatSolve_SuperLU_DIST;
431   B->factor                = FACTOR_LU;
432 
433   lu = (Mat_SuperLU_DIST*)(B->spptr);
434 
435   /* Set the input options */
436   set_default_options_dist(&options);
437   lu->MatInputMode = GLOBAL;
438   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
439 
440   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
441   lu->nprow = size/2;               /* Default process rows.      */
442   if (!lu->nprow) lu->nprow = 1;
443   lu->npcol = size/lu->nprow;           /* Default process columns.   */
444 
445   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
446 
447     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
448     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
449     if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol");
450 
451     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
452     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
453 
454     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
455     if (!flg) {
456       options.Equil = NO;
457     }
458 
459     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
460     if (flg) {
461       switch (indx) {
462       case 0:
463         options.RowPerm = LargeDiag;
464         break;
465       case 1:
466         options.RowPerm = NOROWPERM;
467         break;
468       }
469     }
470 
471     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
472     if (flg) {
473       switch (indx) {
474       case 0:
475         options.ColPerm = MMD_AT_PLUS_A;
476         break;
477       case 1:
478         options.ColPerm = NATURAL;
479         break;
480       case 2:
481         options.ColPerm = MMD_ATA;
482         break;
483       case 3:
484         options.ColPerm = COLAMD;
485         break;
486       }
487     }
488 
489     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
490     if (!flg) {
491       options.ReplaceTinyPivot = NO;
492     }
493 
494     options.IterRefine = NOREFINE;
495     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
496     if (flg) {
497       options.IterRefine = DOUBLE;
498     }
499 
500     if (PetscLogPrintInfo) {
501       lu->StatPrint = (PetscInt)PETSC_TRUE;
502     } else {
503       lu->StatPrint = (PetscInt)PETSC_FALSE;
504     }
505     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
506                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
507   PetscOptionsEnd();
508 
509   /* Initialize the SuperLU process grid. */
510   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
511 
512   /* Initialize ScalePermstruct and LUstruct. */
513   ScalePermstructInit(M, N, &lu->ScalePermstruct);
514   LUstructInit(M, N, &lu->LUstruct);
515 
516   lu->options            = options;
517   lu->flg                = DIFFERENT_NONZERO_PATTERN;
518   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
519   *F = B;
520   PetscFunctionReturn(0);
521 }
522 
523 #undef __FUNCT__
524 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
525 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
526   PetscErrorCode   ierr;
527   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
528 
529   PetscFunctionBegin;
530   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
531   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
532   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
533   PetscFunctionReturn(0);
534 }
535 
536 #undef __FUNCT__
537 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
538 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
539 {
540   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
541   superlu_options_t options;
542   PetscErrorCode    ierr;
543 
544   PetscFunctionBegin;
545   /* check if matrix is superlu_dist type */
546   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
547 
548   options = lu->options;
549   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
550   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
551   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
552   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot] != NO);CHKERRQ(ierr);
553   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
554   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
555   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
556   if (options.ColPerm == NATURAL) {
557     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
558   } else if (options.ColPerm == MMD_AT_PLUS_A) {
559     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
560   } else if (options.ColPerm == MMD_ATA) {
561     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
562   } else if (options.ColPerm == COLAMD) {
563     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation COLAMD\n");CHKERRQ(ierr);
564   } else {
565     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
566   }
567   PetscFunctionReturn(0);
568 }
569 
570 #undef __FUNCT__
571 #define __FUNCT__ "MatView_SuperLU_DIST"
572 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
573 {
574   PetscErrorCode    ierr;
575   PetscTruth        iascii;
576   PetscViewerFormat format;
577   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
578 
579   PetscFunctionBegin;
580   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
581 
582   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
583   if (iascii) {
584     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
585     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
586       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
587     }
588   }
589   PetscFunctionReturn(0);
590 }
591 
592 
593 EXTERN_C_BEGIN
594 #undef __FUNCT__
595 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
596 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
597 {
598   /* This routine is only called to convert to MATSUPERLU_DIST */
599   /* from MATSEQAIJ if A has a single process communicator */
600   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
601   PetscErrorCode   ierr;
602   PetscMPIInt      size;
603   MPI_Comm         comm;
604   Mat              B=*newmat;
605   Mat_SuperLU_DIST *lu;
606 
607   PetscFunctionBegin;
608   if (reuse == MAT_INITIAL_MATRIX) {
609     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
610   }
611 
612   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
613   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
614 
615   lu->MatDuplicate         = A->ops->duplicate;
616   lu->MatView              = A->ops->view;
617   lu->MatAssemblyEnd       = A->ops->assemblyend;
618   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
619   lu->MatDestroy           = A->ops->destroy;
620   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
621 
622   B->spptr                 = (void*)lu;
623   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
624   B->ops->view             = MatView_SuperLU_DIST;
625   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
626   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
627   B->ops->destroy          = MatDestroy_SuperLU_DIST;
628   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
629   if (size == 1) {
630     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
631                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
632     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
633                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
634   } else {
635     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
636                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
637     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
638                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
639   }
640   ierr = PetscLogInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr);
641   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
642   *newmat = B;
643   PetscFunctionReturn(0);
644 }
645 EXTERN_C_END
646 
647 #undef __FUNCT__
648 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
649 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
650   PetscErrorCode   ierr;
651   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
652 
653   PetscFunctionBegin;
654   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
655   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
656   PetscFunctionReturn(0);
657 }
658 
659 /*MC
660   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
661   via the external package SuperLU_DIST.
662 
663   If SuperLU_DIST is installed (see the manual for
664   instructions on how to declare the existence of external packages),
665   a matrix type can be constructed which invokes SuperLU_DIST solvers.
666   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
667 
668   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
669   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
670   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
671   for communicators controlling multiple processes.  It is recommended that you call both of
672   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
673   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
674   without data copy.
675 
676   Options Database Keys:
677 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
678 . -mat_superlu_dist_r <n> - number of rows in processor partition
679 . -mat_superlu_dist_c <n> - number of columns in processor partition
680 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
681 . -mat_superlu_dist_equil - equilibrate the matrix
682 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
683 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
684 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
685 . -mat_superlu_dist_iterrefine - use iterative refinement
686 - -mat_superlu_dist_statprint - print factorization information
687 
688    Level: beginner
689 
690 .seealso: PCLU
691 M*/
692 
693 EXTERN_C_BEGIN
694 #undef __FUNCT__
695 #define __FUNCT__ "MatCreate_SuperLU_DIST"
696 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
697 {
698   PetscErrorCode ierr;
699   PetscMPIInt    size;
700   Mat            A_diag;
701 
702   PetscFunctionBegin;
703   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
704   /*   and SuperLU_DIST types */
705   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
706   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
707   if (size == 1) {
708     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
709   } else {
710     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
711     A_diag = ((Mat_MPIAIJ *)A->data)->A;
712     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
713   }
714   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
715   PetscFunctionReturn(0);
716 }
717 EXTERN_C_END
718 
719