xref: /petsc/src/mat/impls/aij/mpi/superlu_dist/superlu_dist.c (revision ed1caa07d30b4bf52fa72dab33adf574bdef2af0)
1 #define PETSCMAT_DLL
2 
3 /*
4         Provides an interface to the SuperLU_DIST_2.0 sparse solver
5 */
6 
7 #include "src/mat/impls/aij/seq/aij.h"
8 #include "src/mat/impls/aij/mpi/mpiaij.h"
9 #if defined(PETSC_HAVE_STDLIB_H) /* This is to get arround weird problem with SuperLU on cray */
10 #include "stdlib.h"
11 #endif
12 
13 EXTERN_C_BEGIN
14 #if defined(PETSC_USE_COMPLEX)
15 #include "superlu_zdefs.h"
16 #else
17 #include "superlu_ddefs.h"
18 #endif
19 EXTERN_C_END
20 
21 typedef enum { GLOBAL,DISTRIBUTED
22 } SuperLU_MatInputMode;
23 
24 typedef struct {
25   int_t                   nprow,npcol,*row,*col;
26   gridinfo_t              grid;
27   superlu_options_t       options;
28   SuperMatrix             A_sup;
29   ScalePermstruct_t       ScalePermstruct;
30   LUstruct_t              LUstruct;
31   int                     StatPrint;
32   int                     MatInputMode;
33   SOLVEstruct_t           SOLVEstruct;
34   MatStructure            flg;
35   MPI_Comm                comm_superlu;
36 #if defined(PETSC_USE_COMPLEX)
37   doublecomplex           *val;
38 #else
39   double                  *val;
40 #endif
41 
42   /* A few function pointers for inheritance */
43   PetscErrorCode (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
44   PetscErrorCode (*MatView)(Mat,PetscViewer);
45   PetscErrorCode (*MatAssemblyEnd)(Mat,MatAssemblyType);
46   PetscErrorCode (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
47   PetscErrorCode (*MatDestroy)(Mat);
48 
49   /* Flag to clean up (non-global) SuperLU objects during Destroy */
50   PetscTruth CleanUpSuperLU_Dist;
51 } Mat_SuperLU_DIST;
52 
53 EXTERN PetscErrorCode MatDuplicate_SuperLU_DIST(Mat,MatDuplicateOption,Mat*);
54 
55 EXTERN_C_BEGIN
56 #undef __FUNCT__
57 #define __FUNCT__ "MatConvert_SuperLU_DIST_Base"
58 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SuperLU_DIST_Base(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
59 {
60   PetscErrorCode   ierr;
61   Mat              B=*newmat;
62   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
63 
64   PetscFunctionBegin;
65   if (reuse == MAT_INITIAL_MATRIX) {
66     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
67   }
68   /* Reset the original function pointers */
69   B->ops->duplicate        = lu->MatDuplicate;
70   B->ops->view             = lu->MatView;
71   B->ops->assemblyend      = lu->MatAssemblyEnd;
72   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
73   B->ops->destroy          = lu->MatDestroy;
74 
75   ierr = PetscFree(lu);CHKERRQ(ierr);
76 
77   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
78   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
79   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C","",PETSC_NULL);CHKERRQ(ierr);
80   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C","",PETSC_NULL);CHKERRQ(ierr);
81 
82   ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr);
83   *newmat = B;
84   PetscFunctionReturn(0);
85 }
86 EXTERN_C_END
87 
88 #undef __FUNCT__
89 #define __FUNCT__ "MatDestroy_SuperLU_DIST"
90 PetscErrorCode MatDestroy_SuperLU_DIST(Mat A)
91 {
92   PetscErrorCode   ierr;
93   PetscMPIInt      size;
94   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
95 
96   PetscFunctionBegin;
97   if (lu->CleanUpSuperLU_Dist) {
98     /* Deallocate SuperLU_DIST storage */
99     if (lu->MatInputMode == GLOBAL) {
100       Destroy_CompCol_Matrix_dist(&lu->A_sup);
101     } else {
102       Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);
103       if ( lu->options.SolveInitialized ) {
104 #if defined(PETSC_USE_COMPLEX)
105         zSolveFinalize(&lu->options, &lu->SOLVEstruct);
106 #else
107         dSolveFinalize(&lu->options, &lu->SOLVEstruct);
108 #endif
109       }
110     }
111     Destroy_LU(A->N, &lu->grid, &lu->LUstruct);
112     ScalePermstructFree(&lu->ScalePermstruct);
113     LUstructFree(&lu->LUstruct);
114 
115     /* Release the SuperLU_DIST process grid. */
116     superlu_gridexit(&lu->grid);
117 
118     ierr = MPI_Comm_free(&(lu->comm_superlu));CHKERRQ(ierr);
119   }
120 
121   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
122   if (size == 1) {
123     ierr = MatConvert_SuperLU_DIST_Base(A,MATSEQAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
124   } else {
125     ierr = MatConvert_SuperLU_DIST_Base(A,MATMPIAIJ,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
126   }
127   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
128 
129   PetscFunctionReturn(0);
130 }
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "MatSolve_SuperLU_DIST"
134 PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x)
135 {
136   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->spptr;
137   PetscErrorCode   ierr;
138   PetscMPIInt      size;
139   PetscInt         m=A->M, N=A->N;
140   SuperLUStat_t    stat;
141   double           berr[1];
142   PetscScalar      *bptr;
143   PetscInt         info, nrhs=1;
144   Vec              x_seq;
145   IS               iden;
146   VecScatter       scat;
147 
148   PetscFunctionBegin;
149   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
150   if (size > 1) {
151     if (lu->MatInputMode == GLOBAL) { /* global mat input, convert b to x_seq */
152       ierr = VecCreateSeq(PETSC_COMM_SELF,N,&x_seq);CHKERRQ(ierr);
153       ierr = ISCreateStride(PETSC_COMM_SELF,N,0,1,&iden);CHKERRQ(ierr);
154       ierr = VecScatterCreate(b_mpi,iden,x_seq,iden,&scat);CHKERRQ(ierr);
155       ierr = ISDestroy(iden);CHKERRQ(ierr);
156 
157       ierr = VecScatterBegin(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
158       ierr = VecScatterEnd(b_mpi,x_seq,INSERT_VALUES,SCATTER_FORWARD,scat);CHKERRQ(ierr);
159       ierr = VecGetArray(x_seq,&bptr);CHKERRQ(ierr);
160     } else { /* distributed mat input */
161       ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
162       ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
163     }
164   } else { /* size == 1 */
165     ierr = VecCopy(b_mpi,x);CHKERRQ(ierr);
166     ierr = VecGetArray(x,&bptr);CHKERRQ(ierr);
167   }
168 
169   lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only.*/
170 
171   PStatInit(&stat);        /* Initialize the statistics variables. */
172 
173   if (lu->MatInputMode == GLOBAL) {
174 #if defined(PETSC_USE_COMPLEX)
175     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,(doublecomplex*)bptr, m, nrhs,
176                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
177 #else
178     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct,bptr, m, nrhs,
179                    &lu->grid, &lu->LUstruct, berr, &stat, &info);
180 #endif
181   } else { /* distributed mat input */
182 #if defined(PETSC_USE_COMPLEX)
183     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, (doublecomplex*)bptr, A->M, nrhs, &lu->grid,
184 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
185     if (info) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",info);
186 #else
187     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, bptr, A->M, nrhs, &lu->grid,
188 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info);
189     if (info) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",info);
190 #endif
191   }
192   if (lu->StatPrint) {
193      PStatPrint(&lu->options, &stat, &lu->grid);     /* Print the statistics. */
194   }
195   PStatFree(&stat);
196 
197   if (size > 1) {
198     if (lu->MatInputMode == GLOBAL){ /* convert seq x to mpi x */
199       ierr = VecRestoreArray(x_seq,&bptr);CHKERRQ(ierr);
200       ierr = VecScatterBegin(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
201       ierr = VecScatterEnd(x_seq,x,INSERT_VALUES,SCATTER_REVERSE,scat);CHKERRQ(ierr);
202       ierr = VecScatterDestroy(scat);CHKERRQ(ierr);
203       ierr = VecDestroy(x_seq);CHKERRQ(ierr);
204     } else {
205       ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
206     }
207   } else {
208     ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr);
209   }
210   PetscFunctionReturn(0);
211 }
212 
213 #undef __FUNCT__
214 #define __FUNCT__ "MatLUFactorNumeric_SuperLU_DIST"
215 PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat A,MatFactorInfo *info,Mat *F)
216 {
217   Mat              *tseq,A_seq = PETSC_NULL;
218   Mat_SeqAIJ       *aa,*bb;
219   Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)(*F)->spptr;
220   PetscErrorCode   ierr;
221   PetscInt         M=A->M,N=A->N,sinfo,i,*ai,*aj,*bi,*bj,nz,rstart,*garray,
222                    m=A->m, irow,colA_start,j,jcol,jB,countA,countB,*bjj,*ajj;
223   PetscMPIInt      size,rank;
224   SuperLUStat_t    stat;
225   double           *berr=0;
226   IS               isrow;
227   PetscLogDouble   time0,time,time_min,time_max;
228 #if defined(PETSC_USE_COMPLEX)
229   doublecomplex    *av, *bv;
230 #else
231   double           *av, *bv;
232 #endif
233 
234   PetscFunctionBegin;
235   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
236   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
237 
238   if (lu->StatPrint) { /* collect time for mat conversion */
239     ierr = MPI_Barrier(A->comm);CHKERRQ(ierr);
240     ierr = PetscGetTime(&time0);CHKERRQ(ierr);
241   }
242 
243   if (lu->MatInputMode == GLOBAL) { /* global mat input */
244     if (size > 1) { /* convert mpi A to seq mat A */
245       ierr = ISCreateStride(PETSC_COMM_SELF,M,0,1,&isrow);CHKERRQ(ierr);
246       ierr = MatGetSubMatrices(A,1,&isrow,&isrow,MAT_INITIAL_MATRIX,&tseq);CHKERRQ(ierr);
247       ierr = ISDestroy(isrow);CHKERRQ(ierr);
248 
249       A_seq = *tseq;
250       ierr = PetscFree(tseq);CHKERRQ(ierr);
251       aa =  (Mat_SeqAIJ*)A_seq->data;
252     } else {
253       aa =  (Mat_SeqAIJ*)A->data;
254     }
255 
256     /* Allocate storage, then convert Petsc NR matrix to SuperLU_DIST NC */
257     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
258 #if defined(PETSC_USE_COMPLEX)
259       zallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
260 #else
261       dallocateA_dist(N, aa->nz, &lu->val, &lu->col, &lu->row);
262 #endif
263     } else { /* successive numeric factorization, sparsity pattern is reused. */
264       Destroy_CompCol_Matrix_dist(&lu->A_sup);
265       Destroy_LU(N, &lu->grid, &lu->LUstruct);
266       lu->options.Fact = SamePattern;
267     }
268 #if defined(PETSC_USE_COMPLEX)
269     zCompRow_to_CompCol_dist(M,N,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,&lu->val,&lu->col, &lu->row);
270 #else
271     dCompRow_to_CompCol_dist(M,N,aa->nz,aa->a,aa->j,aa->i,&lu->val, &lu->col, &lu->row);
272 #endif
273 
274     /* Create compressed column matrix A_sup. */
275 #if defined(PETSC_USE_COMPLEX)
276     zCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_Z, SLU_GE);
277 #else
278     dCreate_CompCol_Matrix_dist(&lu->A_sup, M, N, aa->nz, lu->val, lu->col, lu->row, SLU_NC, SLU_D, SLU_GE);
279 #endif
280   } else { /* distributed mat input */
281     Mat_MPIAIJ *mat = (Mat_MPIAIJ*)A->data;
282     aa=(Mat_SeqAIJ*)(mat->A)->data;
283     bb=(Mat_SeqAIJ*)(mat->B)->data;
284     ai=aa->i; aj=aa->j;
285     bi=bb->i; bj=bb->j;
286 #if defined(PETSC_USE_COMPLEX)
287     av=(doublecomplex*)aa->a;
288     bv=(doublecomplex*)bb->a;
289 #else
290     av=aa->a;
291     bv=bb->a;
292 #endif
293     rstart = mat->rstart;
294     nz     = aa->nz + bb->nz;
295     garray = mat->garray;
296     rstart = mat->rstart;
297 
298     if (lu->flg == DIFFERENT_NONZERO_PATTERN) {/* first numeric factorization */
299 #if defined(PETSC_USE_COMPLEX)
300       zallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
301 #else
302       dallocateA_dist(m, nz, &lu->val, &lu->col, &lu->row);
303 #endif
304     } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */
305       /* Destroy_CompRowLoc_Matrix_dist(&lu->A_sup);  */ /* crash! */
306       Destroy_LU(N, &lu->grid, &lu->LUstruct);
307       lu->options.Fact = SamePattern;
308     }
309     nz = 0; irow = mat->rstart;
310     for ( i=0; i<m; i++ ) {
311       lu->row[i] = nz;
312       countA = ai[i+1] - ai[i];
313       countB = bi[i+1] - bi[i];
314       ajj = aj + ai[i];  /* ptr to the beginning of this row */
315       bjj = bj + bi[i];
316 
317       /* B part, smaller col index */
318       colA_start = mat->rstart + ajj[0]; /* the smallest global col index of A */
319       jB = 0;
320       for (j=0; j<countB; j++){
321         jcol = garray[bjj[j]];
322         if (jcol > colA_start) {
323           jB = j;
324           break;
325         }
326         lu->col[nz] = jcol;
327         lu->val[nz++] = *bv++;
328         if (j==countB-1) jB = countB;
329       }
330 
331       /* A part */
332       for (j=0; j<countA; j++){
333         lu->col[nz] = mat->rstart + ajj[j];
334         lu->val[nz++] = *av++;
335       }
336 
337       /* B part, larger col index */
338       for (j=jB; j<countB; j++){
339         lu->col[nz] = garray[bjj[j]];
340         lu->val[nz++] = *bv++;
341       }
342     }
343     lu->row[m] = nz;
344 #if defined(PETSC_USE_COMPLEX)
345     zCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
346 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_Z, SLU_GE);
347 #else
348     dCreate_CompRowLoc_Matrix_dist(&lu->A_sup, M, N, nz, m, rstart,
349 				   lu->val, lu->col, lu->row, SLU_NR_loc, SLU_D, SLU_GE);
350 #endif
351   }
352   if (lu->StatPrint) {
353     ierr = PetscGetTime(&time);CHKERRQ(ierr);
354     time0 = time - time0;
355   }
356 
357   /* Factor the matrix. */
358   PStatInit(&stat);   /* Initialize the statistics variables. */
359 
360   if (lu->MatInputMode == GLOBAL) { /* global mat input */
361 #if defined(PETSC_USE_COMPLEX)
362     pzgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
363                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
364 #else
365     pdgssvx_ABglobal(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0,
366                    &lu->grid, &lu->LUstruct, berr, &stat, &sinfo);
367 #endif
368   } else { /* distributed mat input */
369 #if defined(PETSC_USE_COMPLEX)
370     pzgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
371 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
372     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pzgssvx fails, info: %d\n",sinfo);
373 #else
374     pdgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, M, 0, &lu->grid,
375 	    &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo);
376     if (sinfo) SETERRQ1(PETSC_ERR_LIB,"pdgssvx fails, info: %d\n",sinfo);
377 #endif
378   }
379 
380   if (lu->MatInputMode == GLOBAL && size > 1){
381     ierr = MatDestroy(A_seq);CHKERRQ(ierr);
382   }
383 
384   if (lu->StatPrint) {
385     if (size > 1){
386       ierr = MPI_Reduce(&time0,&time_max,1,MPI_DOUBLE,MPI_MAX,0,A->comm);
387       ierr = MPI_Reduce(&time0,&time_min,1,MPI_DOUBLE,MPI_MIN,0,A->comm);
388       ierr = MPI_Reduce(&time0,&time,1,MPI_DOUBLE,MPI_SUM,0,A->comm);
389       time = time/size; /* average time */
390       if (!rank)
391         ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time (max/min/avg): \n \
392                               %g / %g / %g\n",time_max,time_min,time);
393     } else {
394       ierr = PetscPrintf(PETSC_COMM_SELF, "        Mat conversion(PETSc->SuperLU_DIST) time: \n \
395                               %g\n",time0);
396     }
397 
398     PStatPrint(&lu->options, &stat, &lu->grid);  /* Print the statistics. */
399   }
400   PStatFree(&stat);
401   (*F)->assembled = PETSC_TRUE;
402   lu->flg         = SAME_NONZERO_PATTERN;
403   PetscFunctionReturn(0);
404 }
405 
406 /* Note the Petsc r and c permutations are ignored */
407 #undef __FUNCT__
408 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU_DIST"
409 PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
410 {
411   Mat               B;
412   Mat_SuperLU_DIST  *lu;
413   PetscErrorCode    ierr;
414   PetscInt          M=A->M,N=A->N,indx;
415   PetscMPIInt       size;
416   superlu_options_t options;
417   PetscTruth        flg;
418   const char        *ptype[] = {"MMD_AT_PLUS_A","NATURAL","MMD_ATA","COLAMD"};
419   const char        *prtype[] = {"LargeDiag","NATURAL"};
420 
421   PetscFunctionBegin;
422   /* Create the factorization matrix */
423   ierr = MatCreate(A->comm,A->m,A->n,M,N,&B);CHKERRQ(ierr);
424   ierr = MatSetType(B,A->type_name);CHKERRQ(ierr);
425   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);
426   ierr = MatMPIAIJSetPreallocation(B,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
427 
428   B->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU_DIST;
429   B->ops->solve            = MatSolve_SuperLU_DIST;
430   B->factor                = FACTOR_LU;
431 
432   lu = (Mat_SuperLU_DIST*)(B->spptr);
433 
434   /* Set the input options */
435   set_default_options_dist(&options);
436   lu->MatInputMode = GLOBAL;
437   ierr = MPI_Comm_dup(A->comm,&(lu->comm_superlu));CHKERRQ(ierr);
438 
439   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
440   lu->nprow = size/2;               /* Default process rows.      */
441   if (!lu->nprow) lu->nprow = 1;
442   lu->npcol = size/lu->nprow;           /* Default process columns.   */
443 
444   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr);
445 
446     ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,&lu->nprow,PETSC_NULL);CHKERRQ(ierr);
447     ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,&lu->npcol,PETSC_NULL);CHKERRQ(ierr);
448     if (size != lu->nprow * lu->npcol) SETERRQ(PETSC_ERR_ARG_SIZ,"Number of processes should be equal to nprow*npcol");
449 
450     ierr = PetscOptionsInt("-mat_superlu_dist_matinput","Matrix input mode (0: GLOBAL; 1: DISTRIBUTED)","None",lu->MatInputMode,&lu->MatInputMode,PETSC_NULL);CHKERRQ(ierr);
451     if(lu->MatInputMode == DISTRIBUTED && size == 1) lu->MatInputMode = GLOBAL;
452 
453     ierr = PetscOptionsLogical("-mat_superlu_dist_equil","Equilibrate matrix","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
454     if (!flg) {
455       options.Equil = NO;
456     }
457 
458     ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",prtype,2,prtype[0],&indx,&flg);CHKERRQ(ierr);
459     if (flg) {
460       switch (indx) {
461       case 0:
462         options.RowPerm = LargeDiag;
463         break;
464       case 1:
465         options.RowPerm = NOROWPERM;
466         break;
467       }
468     }
469 
470     ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",ptype,4,ptype[0],&indx,&flg);CHKERRQ(ierr);
471     if (flg) {
472       switch (indx) {
473       case 0:
474         options.ColPerm = MMD_AT_PLUS_A;
475         break;
476       case 1:
477         options.ColPerm = NATURAL;
478         break;
479       case 2:
480         options.ColPerm = MMD_ATA;
481         break;
482       case 3:
483         options.ColPerm = COLAMD;
484         break;
485       }
486     }
487 
488     ierr = PetscOptionsLogical("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",PETSC_TRUE,&flg,0);CHKERRQ(ierr);
489     if (!flg) {
490       options.ReplaceTinyPivot = NO;
491     }
492 
493     options.IterRefine = NOREFINE;
494     ierr = PetscOptionsLogical("-mat_superlu_dist_iterrefine","Use iterative refinement","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
495     if (flg) {
496       options.IterRefine = DOUBLE;
497     }
498 
499     if (PetscLogPrintInfo) {
500       lu->StatPrint = (PetscInt)PETSC_TRUE;
501     } else {
502       lu->StatPrint = (PetscInt)PETSC_FALSE;
503     }
504     ierr = PetscOptionsLogical("-mat_superlu_dist_statprint","Print factorization information","None",
505                               (PetscTruth)lu->StatPrint,(PetscTruth*)&lu->StatPrint,0);CHKERRQ(ierr);
506   PetscOptionsEnd();
507 
508   /* Initialize the SuperLU process grid. */
509   superlu_gridinit(lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid);
510 
511   /* Initialize ScalePermstruct and LUstruct. */
512   ScalePermstructInit(M, N, &lu->ScalePermstruct);
513   LUstructInit(M, N, &lu->LUstruct);
514 
515   lu->options            = options;
516   lu->flg                = DIFFERENT_NONZERO_PATTERN;
517   lu->CleanUpSuperLU_Dist = PETSC_TRUE;
518   *F = B;
519   PetscFunctionReturn(0);
520 }
521 
522 #undef __FUNCT__
523 #define __FUNCT__ "MatAssemblyEnd_SuperLU_DIST"
524 PetscErrorCode MatAssemblyEnd_SuperLU_DIST(Mat A,MatAssemblyType mode) {
525   PetscErrorCode   ierr;
526   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)(A->spptr);
527 
528   PetscFunctionBegin;
529   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
530   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
531   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
532   PetscFunctionReturn(0);
533 }
534 
535 #undef __FUNCT__
536 #define __FUNCT__ "MatFactorInfo_SuperLU_DIST"
537 PetscErrorCode MatFactorInfo_SuperLU_DIST(Mat A,PetscViewer viewer)
538 {
539   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)A->spptr;
540   superlu_options_t options;
541   PetscErrorCode    ierr;
542 
543   PetscFunctionBegin;
544   /* check if matrix is superlu_dist type */
545   if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0);
546 
547   options = lu->options;
548   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr);
549   ierr = PetscViewerASCIIPrintf(viewer,"  Equilibrate matrix %s \n",PetscTruths[options.Equil != NO]);CHKERRQ(ierr);
550   ierr = PetscViewerASCIIPrintf(viewer,"  Matrix input mode %d \n",lu->MatInputMode);CHKERRQ(ierr);
551   ierr = PetscViewerASCIIPrintf(viewer,"  Replace tiny pivots %s \n",PetscTruths[options.ReplaceTinyPivot] != NO);CHKERRQ(ierr);
552   ierr = PetscViewerASCIIPrintf(viewer,"  Use iterative refinement %s \n",PetscTruths[options.IterRefine == DOUBLE]);CHKERRQ(ierr);
553   ierr = PetscViewerASCIIPrintf(viewer,"  Processors in row %d col partition %d \n",lu->nprow,lu->npcol);CHKERRQ(ierr);
554   ierr = PetscViewerASCIIPrintf(viewer,"  Row permutation %s \n",(options.RowPerm == NOROWPERM) ? "NATURAL": "LargeDiag");CHKERRQ(ierr);
555   if (options.ColPerm == NATURAL) {
556     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation NATURAL\n");CHKERRQ(ierr);
557   } else if (options.ColPerm == MMD_AT_PLUS_A) {
558     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr);
559   } else if (options.ColPerm == MMD_ATA) {
560     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation MMD_ATA\n");CHKERRQ(ierr);
561   } else if (options.ColPerm == COLAMD) {
562     ierr = PetscViewerASCIIPrintf(viewer,"  Column permutation COLAMD\n");CHKERRQ(ierr);
563   } else {
564     SETERRQ(PETSC_ERR_ARG_WRONG,"Unknown column permutation");
565   }
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatView_SuperLU_DIST"
571 PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer)
572 {
573   PetscErrorCode    ierr;
574   PetscTruth        iascii;
575   PetscViewerFormat format;
576   Mat_SuperLU_DIST  *lu=(Mat_SuperLU_DIST*)(A->spptr);
577 
578   PetscFunctionBegin;
579   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
580 
581   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
582   if (iascii) {
583     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
584     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
585       ierr = MatFactorInfo_SuperLU_DIST(A,viewer);CHKERRQ(ierr);
586     }
587   }
588   PetscFunctionReturn(0);
589 }
590 
591 
592 EXTERN_C_BEGIN
593 #undef __FUNCT__
594 #define __FUNCT__ "MatConvert_Base_SuperLU_DIST"
595 PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_Base_SuperLU_DIST(Mat A,const MatType type,MatReuse reuse,Mat *newmat)
596 {
597   /* This routine is only called to convert to MATSUPERLU_DIST */
598   /* from MATSEQAIJ if A has a single process communicator */
599   /* or MATMPIAIJ otherwise, so we will ignore 'MatType type'. */
600   PetscErrorCode   ierr;
601   PetscMPIInt      size;
602   MPI_Comm         comm;
603   Mat              B=*newmat;
604   Mat_SuperLU_DIST *lu;
605 
606   PetscFunctionBegin;
607   if (reuse == MAT_INITIAL_MATRIX) {
608     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
609   }
610 
611   ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr);
612   ierr = PetscNew(Mat_SuperLU_DIST,&lu);CHKERRQ(ierr);
613 
614   lu->MatDuplicate         = A->ops->duplicate;
615   lu->MatView              = A->ops->view;
616   lu->MatAssemblyEnd       = A->ops->assemblyend;
617   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
618   lu->MatDestroy           = A->ops->destroy;
619   lu->CleanUpSuperLU_Dist  = PETSC_FALSE;
620 
621   B->spptr                 = (void*)lu;
622   B->ops->duplicate        = MatDuplicate_SuperLU_DIST;
623   B->ops->view             = MatView_SuperLU_DIST;
624   B->ops->assemblyend      = MatAssemblyEnd_SuperLU_DIST;
625   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST;
626   B->ops->destroy          = MatDestroy_SuperLU_DIST;
627   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);CHKERRQ(ierr);
628   if (size == 1) {
629     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_dist_C",
630                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
631     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_seqaij_C",
632                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
633   } else {
634     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpiaij_superlu_dist_C",
635                                              "MatConvert_Base_SuperLU_DIST",MatConvert_Base_SuperLU_DIST);CHKERRQ(ierr);
636     ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_dist_mpiaij_C",
637                                              "MatConvert_SuperLU_DIST_Base",MatConvert_SuperLU_DIST_Base);CHKERRQ(ierr);
638   }
639   ierr = PetscLogInfo((0,"MatConvert_Base_SuperLU_DIST:Using SuperLU_DIST for SeqAIJ LU factorization and solves.\n"));CHKERRQ(ierr);
640   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU_DIST);CHKERRQ(ierr);
641   *newmat = B;
642   PetscFunctionReturn(0);
643 }
644 EXTERN_C_END
645 
646 #undef __FUNCT__
647 #define __FUNCT__ "MatDuplicate_SuperLU_DIST"
648 PetscErrorCode MatDuplicate_SuperLU_DIST(Mat A, MatDuplicateOption op, Mat *M) {
649   PetscErrorCode   ierr;
650   Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST *)A->spptr;
651 
652   PetscFunctionBegin;
653   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
654   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU_DIST));CHKERRQ(ierr);
655   PetscFunctionReturn(0);
656 }
657 
658 /*MC
659   MATSUPERLU_DIST - MATSUPERLU_DIST = "superlu_dist" - A matrix type providing direct solvers (LU) for parallel matrices
660   via the external package SuperLU_DIST.
661 
662   If SuperLU_DIST is installed (see the manual for
663   instructions on how to declare the existence of external packages),
664   a matrix type can be constructed which invokes SuperLU_DIST solvers.
665   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU_DIST).
666 
667   This matrix inherits from MATSEQAIJ when constructed with a single process communicator,
668   and from MATMPIAIJ otherwise.  As a result, for single process communicators,
669   MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation is supported
670   for communicators controlling multiple processes.  It is recommended that you call both of
671   the above preallocation routines for simplicity.  One can also call MatConvert for an inplace
672   conversion to or from the MATSEQAIJ or MATMPIAIJ type (depending on the communicator size)
673   without data copy.
674 
675   Options Database Keys:
676 + -mat_type superlu_dist - sets the matrix type to "superlu_dist" during a call to MatSetFromOptions()
677 . -mat_superlu_dist_r <n> - number of rows in processor partition
678 . -mat_superlu_dist_c <n> - number of columns in processor partition
679 . -mat_superlu_dist_matinput <0,1> - matrix input mode; 0=global, 1=distributed
680 . -mat_superlu_dist_equil - equilibrate the matrix
681 . -mat_superlu_dist_rowperm <LargeDiag,NATURAL> - row permutation
682 . -mat_superlu_dist_colperm <MMD_AT_PLUS_A,MMD_ATA,COLAMD,NATURAL> - column permutation
683 . -mat_superlu_dist_replacetinypivot - replace tiny pivots
684 . -mat_superlu_dist_iterrefine - use iterative refinement
685 - -mat_superlu_dist_statprint - print factorization information
686 
687    Level: beginner
688 
689 .seealso: PCLU
690 M*/
691 
692 EXTERN_C_BEGIN
693 #undef __FUNCT__
694 #define __FUNCT__ "MatCreate_SuperLU_DIST"
695 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SuperLU_DIST(Mat A)
696 {
697   PetscErrorCode ierr;
698   PetscMPIInt    size;
699   Mat            A_diag;
700 
701   PetscFunctionBegin;
702   /* Change type name before calling MatSetType to force proper construction of SeqAIJ or MPIAIJ */
703   /*   and SuperLU_DIST types */
704   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU_DIST);CHKERRQ(ierr);
705   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
706   if (size == 1) {
707     ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
708   } else {
709     ierr   = MatSetType(A,MATMPIAIJ);CHKERRQ(ierr);
710     A_diag = ((Mat_MPIAIJ *)A->data)->A;
711     ierr   = MatConvert_Base_SuperLU_DIST(A_diag,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A_diag);CHKERRQ(ierr);
712   }
713   ierr = MatConvert_Base_SuperLU_DIST(A,MATSUPERLU_DIST,MAT_REUSE_MATRIX,&A);CHKERRQ(ierr);
714   PetscFunctionReturn(0);
715 }
716 EXTERN_C_END
717 
718