xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision 860c79edc3e8a2fb7b96a084ecdd3e2f48e9005d)
114b396bbSKris Buschelman 
25a46d3faSBarry Smith /*  --------------------------------------------------------------------
35a46d3faSBarry Smith 
45a46d3faSBarry Smith      This file implements a subclass of the SeqAIJ matrix class that uses
5c2b89b5dSBarry Smith      the SuperLU sparse solver.
614b396bbSKris Buschelman */
714b396bbSKris Buschelman 
85a46d3faSBarry Smith /*
95a46d3faSBarry Smith      Defines the data structure for the base matrix type (SeqAIJ)
105a46d3faSBarry Smith */
11c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
1214b396bbSKris Buschelman 
135a46d3faSBarry Smith /*
145a46d3faSBarry Smith      SuperLU include files
155a46d3faSBarry Smith */
1614b396bbSKris Buschelman EXTERN_C_BEGIN
1714b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
183cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
193cb270beSHong Zhang #include <slu_cdefs.h>
203cb270beSHong Zhang #else
21c6db04a5SJed Brown #include <slu_zdefs.h>
223cb270beSHong Zhang #endif
233cb270beSHong Zhang #else
243cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
253cb270beSHong Zhang #include <slu_sdefs.h>
2614b396bbSKris Buschelman #else
27c6db04a5SJed Brown #include <slu_ddefs.h>
2814b396bbSKris Buschelman #endif
293cb270beSHong Zhang #endif
30c6db04a5SJed Brown #include <slu_util.h>
3114b396bbSKris Buschelman EXTERN_C_END
3214b396bbSKris Buschelman 
335a46d3faSBarry Smith /*
34245fece9SBarry Smith      This is the data that defines the SuperLU factored matrix type
355a46d3faSBarry Smith */
3614b396bbSKris Buschelman typedef struct {
3775af56d4SHong Zhang   SuperMatrix       A,L,U,B,X;
3875af56d4SHong Zhang   superlu_options_t options;
39da7a1d00SHong Zhang   PetscInt          *perm_c; /* column permutation vector */
40da7a1d00SHong Zhang   PetscInt          *perm_r; /* row permutations from partial pivoting */
41da7a1d00SHong Zhang   PetscInt          *etree;
42da7a1d00SHong Zhang   PetscReal         *R, *C;
4375af56d4SHong Zhang   char              equed[1];
44da7a1d00SHong Zhang   PetscInt          lwork;
4575af56d4SHong Zhang   void              *work;
46da7a1d00SHong Zhang   PetscReal         rpg, rcond;
4775af56d4SHong Zhang   mem_usage_t       mem_usage;
4875af56d4SHong Zhang   MatStructure      flg;
495d8b2efaSHong Zhang   SuperLUStat_t     stat;
50cae5a23dSHong Zhang   Mat               A_dup;
51cae5a23dSHong Zhang   PetscScalar       *rhs_dup;
52c147c726SHong Zhang   GlobalLU_t        Glu;
5314b396bbSKris Buschelman 
5414b396bbSKris Buschelman   /* Flag to clean up (non-global) SuperLU objects during Destroy */
55ace3abfcSBarry Smith   PetscBool CleanUpSuperLU;
56f0c56d0fSKris Buschelman } Mat_SuperLU;
5714b396bbSKris Buschelman 
585a46d3faSBarry Smith /*
595a46d3faSBarry Smith     Utility function
605a46d3faSBarry Smith */
61*860c79edSBarry Smith static PetscErrorCode MatView_Info_SuperLU(Mat A,PetscViewer viewer)
625a46d3faSBarry Smith {
63245fece9SBarry Smith   Mat_SuperLU       *lu= (Mat_SuperLU*)A->data;
645a46d3faSBarry Smith   PetscErrorCode    ierr;
655a46d3faSBarry Smith   superlu_options_t options;
665a46d3faSBarry Smith 
675a46d3faSBarry Smith   PetscFunctionBegin;
685a46d3faSBarry Smith   options = lu->options;
692205254eSKarl Rupp 
705a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
715a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES" : "NO");CHKERRQ(ierr);
725a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
735a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
745a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES" : "NO");CHKERRQ(ierr);
755a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
765a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES" : "NO");CHKERRQ(ierr);
775a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES" : "NO");CHKERRQ(ierr);
785a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
795a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES" : "NO");CHKERRQ(ierr);
805a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES" : "NO");CHKERRQ(ierr);
815a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
82d5f3da31SBarry Smith   if (A->factortype == MAT_FACTOR_ILU) {
83cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
84cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
85cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
86cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr);
87cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr);
88cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr);
89cffbb591SHong Zhang   }
905a46d3faSBarry Smith   PetscFunctionReturn(0);
915a46d3faSBarry Smith }
925a46d3faSBarry Smith 
93245fece9SBarry Smith PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
94245fece9SBarry Smith {
95245fece9SBarry Smith   Mat_SuperLU       *lu = (Mat_SuperLU*)A->data;
96245fece9SBarry Smith   const PetscScalar *barray;
97245fece9SBarry Smith   PetscScalar       *xarray;
98245fece9SBarry Smith   PetscErrorCode    ierr;
99245fece9SBarry Smith   PetscInt          info,i,n;
100245fece9SBarry Smith   PetscReal         ferr,berr;
101245fece9SBarry Smith   static PetscBool  cite = PETSC_FALSE;
102245fece9SBarry Smith 
103245fece9SBarry Smith   PetscFunctionBegin;
104245fece9SBarry Smith   if (lu->lwork == -1) PetscFunctionReturn(0);
105245fece9SBarry Smith   ierr = PetscCitationsRegister("@article{superlu99,\n  author  = {James W. Demmel and Stanley C. Eisenstat and\n             John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n  title = {A supernodal approach to sparse partial pivoting},\n  journal = {SIAM J. Matrix Analysis and Applications},\n  year = {1999},\n  volume  = {20},\n  number = {3},\n  pages = {720-755}\n}\n",&cite);CHKERRQ(ierr);
106245fece9SBarry Smith 
107245fece9SBarry Smith   ierr = VecGetLocalSize(x,&n);CHKERRQ(ierr);
108245fece9SBarry Smith   lu->B.ncol = 1;   /* Set the number of right-hand side */
109245fece9SBarry Smith   if (lu->options.Equil && !lu->rhs_dup) {
110245fece9SBarry Smith     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
111245fece9SBarry Smith     ierr = PetscMalloc1(n,&lu->rhs_dup);CHKERRQ(ierr);
112245fece9SBarry Smith   }
113245fece9SBarry Smith   if (lu->options.Equil) {
114245fece9SBarry Smith     /* Copy b into rsh_dup */
115245fece9SBarry Smith     ierr   = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
116245fece9SBarry Smith     ierr   = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr);
117245fece9SBarry Smith     ierr   = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
118245fece9SBarry Smith     barray = lu->rhs_dup;
119245fece9SBarry Smith   } else {
120245fece9SBarry Smith     ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
121245fece9SBarry Smith   }
122245fece9SBarry Smith   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
123245fece9SBarry Smith 
124245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
125245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
126245fece9SBarry Smith   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
127245fece9SBarry Smith   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
128245fece9SBarry Smith #else
129245fece9SBarry Smith   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
130245fece9SBarry Smith   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
131245fece9SBarry Smith #endif
132245fece9SBarry Smith #else
133245fece9SBarry Smith   ((DNformat*)lu->B.Store)->nzval = (void*)barray;
134245fece9SBarry Smith   ((DNformat*)lu->X.Store)->nzval = xarray;
135245fece9SBarry Smith #endif
136245fece9SBarry Smith 
137245fece9SBarry Smith   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
138245fece9SBarry Smith   if (A->factortype == MAT_FACTOR_LU) {
139245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
140245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
141245fece9SBarry Smith     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
142245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
143245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
144245fece9SBarry Smith #else
145245fece9SBarry Smith     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
146245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
147245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
148245fece9SBarry Smith #endif
149245fece9SBarry Smith #else
150245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
151245fece9SBarry Smith     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
152245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
153245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
154245fece9SBarry Smith #else
155245fece9SBarry Smith     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
156245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
157245fece9SBarry Smith                                      &lu->Glu,&lu->mem_usage, &lu->stat, &info));
158245fece9SBarry Smith #endif
159245fece9SBarry Smith #endif
160245fece9SBarry Smith   } else if (A->factortype == MAT_FACTOR_ILU) {
161245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
162245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
163245fece9SBarry Smith     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
164245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
165245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
166245fece9SBarry Smith #else
167245fece9SBarry Smith     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
168245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
169245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
170245fece9SBarry Smith #endif
171245fece9SBarry Smith #else
172245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
173245fece9SBarry Smith     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
174245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
175245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
176245fece9SBarry Smith #else
177245fece9SBarry Smith     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
178245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
179245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
180245fece9SBarry Smith #endif
181245fece9SBarry Smith #endif
182245fece9SBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
183245fece9SBarry Smith   if (!lu->options.Equil) {
184245fece9SBarry Smith     ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
185245fece9SBarry Smith   }
186245fece9SBarry Smith   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
187245fece9SBarry Smith 
188245fece9SBarry Smith   if (!info || info == lu->A.ncol+1) {
189245fece9SBarry Smith     if (lu->options.IterRefine) {
190245fece9SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
191245fece9SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
192245fece9SBarry Smith       for (i = 0; i < 1; ++i) {
193245fece9SBarry Smith         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
194245fece9SBarry Smith       }
195245fece9SBarry Smith     }
196245fece9SBarry Smith   } else if (info > 0) {
197245fece9SBarry Smith     if (lu->lwork == -1) {
198245fece9SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
199245fece9SBarry Smith     } else {
200245fece9SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
201245fece9SBarry Smith     }
202245fece9SBarry Smith   } else if (info < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
203245fece9SBarry Smith 
204245fece9SBarry Smith   if (lu->options.PrintStat) {
205245fece9SBarry Smith     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
206245fece9SBarry Smith     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
207245fece9SBarry Smith   }
208245fece9SBarry Smith   PetscFunctionReturn(0);
209245fece9SBarry Smith }
210245fece9SBarry Smith 
211245fece9SBarry Smith PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
212245fece9SBarry Smith {
213245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)A->data;
214245fece9SBarry Smith   PetscErrorCode ierr;
215245fece9SBarry Smith 
216245fece9SBarry Smith   PetscFunctionBegin;
217603e8f96SBarry Smith   if (A->factorerrortype) {
218245fece9SBarry Smith     ierr = PetscInfo(A,"MatSolve is called with singular matrix factor, skip\n");CHKERRQ(ierr);
219245fece9SBarry Smith     ierr = VecSetInf(x);CHKERRQ(ierr);
220245fece9SBarry Smith     PetscFunctionReturn(0);
221245fece9SBarry Smith   }
222245fece9SBarry Smith 
223245fece9SBarry Smith   lu->options.Trans = TRANS;
224245fece9SBarry Smith   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
225245fece9SBarry Smith   PetscFunctionReturn(0);
226245fece9SBarry Smith }
227245fece9SBarry Smith 
228245fece9SBarry Smith PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
229245fece9SBarry Smith {
230245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)A->data;
231245fece9SBarry Smith   PetscErrorCode ierr;
232245fece9SBarry Smith 
233245fece9SBarry Smith   PetscFunctionBegin;
234603e8f96SBarry Smith   if (A->factorerrortype) {
235245fece9SBarry Smith     ierr = PetscInfo(A,"MatSolve is called with singular matrix factor, skip\n");CHKERRQ(ierr);
236245fece9SBarry Smith     ierr = VecSetInf(x);CHKERRQ(ierr);
237245fece9SBarry Smith     PetscFunctionReturn(0);
238245fece9SBarry Smith   }
239245fece9SBarry Smith 
240245fece9SBarry Smith   lu->options.Trans = NOTRANS;
241245fece9SBarry Smith   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
242245fece9SBarry Smith   PetscFunctionReturn(0);
243245fece9SBarry Smith }
244245fece9SBarry Smith 
245245fece9SBarry Smith static PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
2465a46d3faSBarry Smith {
247245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)F->data;
248cae5a23dSHong Zhang   Mat_SeqAIJ     *aa;
2495a46d3faSBarry Smith   PetscErrorCode ierr;
2505a46d3faSBarry Smith   PetscInt       sinfo;
2515a46d3faSBarry Smith   PetscReal      ferr, berr;
2525a46d3faSBarry Smith   NCformat       *Ustore;
2535a46d3faSBarry Smith   SCformat       *Lstore;
2545a46d3faSBarry Smith 
2555a46d3faSBarry Smith   PetscFunctionBegin;
2565a46d3faSBarry Smith   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
2575a46d3faSBarry Smith     lu->options.Fact = SamePattern;
2585a46d3faSBarry Smith     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
2595a46d3faSBarry Smith     Destroy_SuperMatrix_Store(&lu->A);
260cae5a23dSHong Zhang     if (lu->options.Equil) {
261cae5a23dSHong Zhang       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
262cae5a23dSHong Zhang     }
2635a46d3faSBarry Smith     if (lu->lwork >= 0) {
264d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
265d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
2665a46d3faSBarry Smith       lu->options.Fact = SamePattern;
2675a46d3faSBarry Smith     }
2685a46d3faSBarry Smith   }
2695a46d3faSBarry Smith 
2705a46d3faSBarry Smith   /* Create the SuperMatrix for lu->A=A^T:
2715a46d3faSBarry Smith        Since SuperLU likes column-oriented matrices,we pass it the transpose,
2725a46d3faSBarry Smith        and then solve A^T X = B in MatSolve(). */
273cae5a23dSHong Zhang   if (lu->options.Equil) {
274cae5a23dSHong Zhang     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
275cae5a23dSHong Zhang   } else {
276cae5a23dSHong Zhang     aa = (Mat_SeqAIJ*)(A)->data;
277cae5a23dSHong Zhang   }
2785a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
2793cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
280d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_CompCol_Matrix",cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE));
2813cb270beSHong Zhang #else
282d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_CompCol_Matrix",zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE));
2833cb270beSHong Zhang #endif
2843cb270beSHong Zhang #else
2853cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
286d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_CompCol_Matrix",sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE));
2875a46d3faSBarry Smith #else
288d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_CompCol_Matrix",dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE));
2895a46d3faSBarry Smith #endif
2903cb270beSHong Zhang #endif
2915a46d3faSBarry Smith 
2925a46d3faSBarry Smith   /* Numerical factorization */
2935a46d3faSBarry Smith   lu->B.ncol = 0;  /* Indicate not to solve the system */
294d5f3da31SBarry Smith   if (F->factortype == MAT_FACTOR_LU) {
2955a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
2963cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
297d387c056SBarry Smith     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
2983cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
2995db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
3003cb270beSHong Zhang #else
301d387c056SBarry Smith     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3025a46d3faSBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
3035db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
3043cb270beSHong Zhang #endif
3053cb270beSHong Zhang #else
3063cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
307d387c056SBarry Smith     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3083cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
3095db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
3105a46d3faSBarry Smith #else
311d387c056SBarry Smith     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3125a46d3faSBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
313c147c726SHong Zhang                                      &lu->Glu,&lu->mem_usage, &lu->stat, &sinfo));
3145a46d3faSBarry Smith #endif
3153cb270beSHong Zhang #endif
316d5f3da31SBarry Smith   } else if (F->factortype == MAT_FACTOR_ILU) {
317cffbb591SHong Zhang     /* Compute the incomplete factorization, condition number and pivot growth */
318cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX)
3193cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
320d387c056SBarry Smith     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
3213cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
3225db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
3233cb270beSHong Zhang #else
324d387c056SBarry Smith     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
325cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
3265db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
3273cb270beSHong Zhang #endif
3283cb270beSHong Zhang #else
3293cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
330d387c056SBarry Smith     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3313cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
3325db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
333cffbb591SHong Zhang #else
334d387c056SBarry Smith     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
335cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
336c147c726SHong Zhang                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
337cffbb591SHong Zhang #endif
3383cb270beSHong Zhang #endif
339f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
3405a46d3faSBarry Smith   if (!sinfo || sinfo == lu->A.ncol+1) {
3412205254eSKarl Rupp     if (lu->options.PivotGrowth) {
342675d1226SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);CHKERRQ(ierr);
3432205254eSKarl Rupp     }
3442205254eSKarl Rupp     if (lu->options.ConditionNumber) {
345675d1226SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);CHKERRQ(ierr);
3462205254eSKarl Rupp     }
3475a46d3faSBarry Smith   } else if (sinfo > 0) {
348675d1226SHong Zhang     if (A->erroriffailure) {
349675d1226SHong Zhang       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo);
350675d1226SHong Zhang     } else {
351675d1226SHong Zhang       if (sinfo <= lu->A.ncol) {
352675d1226SHong Zhang         if (lu->options.ILU_FillTol == 0.0) {
353603e8f96SBarry Smith           F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
354675d1226SHong Zhang         }
355675d1226SHong Zhang         ierr = PetscInfo2(F,"Number of zero pivots %D, ILU_FillTol %g\n",sinfo,lu->options.ILU_FillTol);CHKERRQ(ierr);
356675d1226SHong Zhang       } else if (sinfo == lu->A.ncol + 1) {
357675d1226SHong Zhang         /*
358675d1226SHong Zhang          U is nonsingular, but RCOND is less than machine
359675d1226SHong Zhang  		      precision, meaning that the matrix is singular to
360675d1226SHong Zhang  		      working precision. Nevertheless, the solution and
361675d1226SHong Zhang  		      error bounds are computed because there are a number
362675d1226SHong Zhang  		      of situations where the computed solution can be more
363675d1226SHong Zhang  		      accurate than the value of RCOND would suggest.
364675d1226SHong Zhang          */
365675d1226SHong Zhang         ierr = PetscInfo1(F,"Matrix factor U is nonsingular, but is singular to working precision. The solution is computed. info %D",sinfo);CHKERRQ(ierr);
366675d1226SHong Zhang       } else { /* sinfo > lu->A.ncol + 1 */
367603e8f96SBarry Smith         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
368675d1226SHong Zhang         ierr = PetscInfo1(F,"Number of bytes allocated when memory allocation fails %D\n",sinfo);CHKERRQ(ierr);
369675d1226SHong Zhang       }
370675d1226SHong Zhang     }
371f23aa3ddSBarry Smith   } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
3725a46d3faSBarry Smith 
3735a46d3faSBarry Smith   if (lu->options.PrintStat) {
374675d1226SHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");CHKERRQ(ierr);
375d387c056SBarry Smith     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
3765a46d3faSBarry Smith     Lstore = (SCformat*) lu->L.Store;
3775a46d3faSBarry Smith     Ustore = (NCformat*) lu->U.Store;
3785a46d3faSBarry Smith     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
3795a46d3faSBarry Smith     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
3805a46d3faSBarry Smith     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
3816da386baSHong Zhang     ierr   = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
3826da386baSHong Zhang                          lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
3835a46d3faSBarry Smith   }
3845a46d3faSBarry Smith 
3855a46d3faSBarry Smith   lu->flg                = SAME_NONZERO_PATTERN;
3861d5ca7f3SHong Zhang   F->ops->solve          = MatSolve_SuperLU;
3871d5ca7f3SHong Zhang   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
3881aef8b4cSStefano Zampini   F->ops->matsolve       = NULL;
3895a46d3faSBarry Smith   PetscFunctionReturn(0);
3905a46d3faSBarry Smith }
3915a46d3faSBarry Smith 
392245fece9SBarry Smith static PetscErrorCode MatDestroy_SuperLU(Mat A)
39314b396bbSKris Buschelman {
394dfbe8321SBarry Smith   PetscErrorCode ierr;
395245fece9SBarry Smith   Mat_SuperLU    *lu=(Mat_SuperLU*)A->data;
39614b396bbSKris Buschelman 
39714b396bbSKris Buschelman   PetscFunctionBegin;
398245fece9SBarry Smith   if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
399d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->A));
400d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->B));
401d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->X));
402d387c056SBarry Smith     PetscStackCall("SuperLU:StatFree",StatFree(&lu->stat));
4030e742b69SHong Zhang     if (lu->lwork >= 0) {
404d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
405d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
4060e742b69SHong Zhang     }
4070e742b69SHong Zhang   }
4089ce50919SHong Zhang   ierr = PetscFree(lu->etree);CHKERRQ(ierr);
4099ce50919SHong Zhang   ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
4109ce50919SHong Zhang   ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
4119ce50919SHong Zhang   ierr = PetscFree(lu->R);CHKERRQ(ierr);
4129ce50919SHong Zhang   ierr = PetscFree(lu->C);CHKERRQ(ierr);
413bf0cc555SLisandro Dalcin   ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
414bf0cc555SLisandro Dalcin   ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
415245fece9SBarry Smith   ierr = PetscFree(A->data);CHKERRQ(ierr);
4160e742b69SHong Zhang 
417d954db57SHong Zhang   /* clear composed functions */
418bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
419bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluSetILUDropTol_C",NULL);CHKERRQ(ierr);
42014b396bbSKris Buschelman   PetscFunctionReturn(0);
42114b396bbSKris Buschelman }
42214b396bbSKris Buschelman 
423245fece9SBarry Smith static PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
42414b396bbSKris Buschelman {
425dfbe8321SBarry Smith   PetscErrorCode    ierr;
426ace3abfcSBarry Smith   PetscBool         iascii;
42714b396bbSKris Buschelman   PetscViewerFormat format;
42814b396bbSKris Buschelman 
42914b396bbSKris Buschelman   PetscFunctionBegin;
430251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
43132077d6dSBarry Smith   if (iascii) {
43214b396bbSKris Buschelman     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
4332f59403fSHong Zhang     if (format == PETSC_VIEWER_ASCII_INFO) {
434*860c79edSBarry Smith       ierr = MatView_Info_SuperLU(A,viewer);CHKERRQ(ierr);
43514b396bbSKris Buschelman     }
43614b396bbSKris Buschelman   }
43714b396bbSKris Buschelman   PetscFunctionReturn(0);
43814b396bbSKris Buschelman }
43914b396bbSKris Buschelman 
440e0b74bf9SHong Zhang PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
441e0b74bf9SHong Zhang {
442245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)A->data;
443cd723cd1SBarry Smith   PetscBool      flg;
444cd723cd1SBarry Smith   PetscErrorCode ierr;
445e0b74bf9SHong Zhang 
446e0b74bf9SHong Zhang   PetscFunctionBegin;
4470298fd71SBarry Smith   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
448ce94432eSBarry Smith   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
4490298fd71SBarry Smith   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
450ce94432eSBarry Smith   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
4512205254eSKarl Rupp   lu->options.Trans = TRANS;
452e0b74bf9SHong Zhang   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
453e0b74bf9SHong Zhang   PetscFunctionReturn(0);
454e0b74bf9SHong Zhang }
455e0b74bf9SHong Zhang 
45614b396bbSKris Buschelman /*
45714b396bbSKris Buschelman    Note the r permutation is ignored
45814b396bbSKris Buschelman */
459245fece9SBarry Smith static PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
46014b396bbSKris Buschelman {
461245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU*)(F->data);
462b24902e0SBarry Smith 
463b24902e0SBarry Smith   PetscFunctionBegin;
464b24902e0SBarry Smith   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
465b24902e0SBarry Smith   lu->CleanUpSuperLU      = PETSC_TRUE;
4661d5ca7f3SHong Zhang   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
467b24902e0SBarry Smith   PetscFunctionReturn(0);
468b24902e0SBarry Smith }
469b24902e0SBarry Smith 
470b2573a8aSBarry Smith static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
4715ccb76cbSHong Zhang {
472245fece9SBarry Smith   Mat_SuperLU *lu= (Mat_SuperLU*)F->data;
4735ccb76cbSHong Zhang 
4745ccb76cbSHong Zhang   PetscFunctionBegin;
4755ccb76cbSHong Zhang   lu->options.ILU_DropTol = dtol;
4765ccb76cbSHong Zhang   PetscFunctionReturn(0);
4775ccb76cbSHong Zhang }
4785ccb76cbSHong Zhang 
4795ccb76cbSHong Zhang /*@
4805ccb76cbSHong Zhang   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
4815ccb76cbSHong Zhang    Logically Collective on Mat
4825ccb76cbSHong Zhang 
4835ccb76cbSHong Zhang    Input Parameters:
4845ccb76cbSHong Zhang +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
4855ccb76cbSHong Zhang -  dtol - drop tolerance
4865ccb76cbSHong Zhang 
4875ccb76cbSHong Zhang   Options Database:
4885ccb76cbSHong Zhang .   -mat_superlu_ilu_droptol <dtol>
4895ccb76cbSHong Zhang 
4905ccb76cbSHong Zhang    Level: beginner
4915ccb76cbSHong Zhang 
49296a0c994SBarry Smith    References:
49396a0c994SBarry Smith .      SuperLU Users' Guide
4945ccb76cbSHong Zhang 
4955ccb76cbSHong Zhang .seealso: MatGetFactor()
4965ccb76cbSHong Zhang @*/
4975ccb76cbSHong Zhang PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
4985ccb76cbSHong Zhang {
4995ccb76cbSHong Zhang   PetscErrorCode ierr;
5005ccb76cbSHong Zhang 
5015ccb76cbSHong Zhang   PetscFunctionBegin;
5025ccb76cbSHong Zhang   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
50369fbec6eSBarry Smith   PetscValidLogicalCollectiveReal(F,dtol,2);
5045ccb76cbSHong Zhang   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
5055ccb76cbSHong Zhang   PetscFunctionReturn(0);
5065ccb76cbSHong Zhang }
5075ccb76cbSHong Zhang 
50835bd34faSBarry Smith PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
50935bd34faSBarry Smith {
51035bd34faSBarry Smith   PetscFunctionBegin;
5112692d6eeSBarry Smith   *type = MATSOLVERSUPERLU;
51235bd34faSBarry Smith   PetscFunctionReturn(0);
51335bd34faSBarry Smith }
51435bd34faSBarry Smith 
515b24902e0SBarry Smith /*MC
516ba992d64SSatish Balay   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
517b24902e0SBarry Smith   via the external package SuperLU.
518b24902e0SBarry Smith 
519e2e64c6bSBarry Smith   Use ./configure --download-superlu to have PETSc installed with SuperLU
520b24902e0SBarry Smith 
521de5a416aSHong Zhang   Use -pc_type lu -pc_factor_mat_solver_package superlu to use this direct solver
522c2b89b5dSBarry Smith 
523b24902e0SBarry Smith   Options Database Keys:
524e08999f5SMatthew G Knepley + -mat_superlu_equil <FALSE>            - Equil (None)
525e08999f5SMatthew G Knepley . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
526e08999f5SMatthew G Knepley . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
527e08999f5SMatthew G Knepley . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
528e08999f5SMatthew G Knepley . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
529e08999f5SMatthew G Knepley . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
530e08999f5SMatthew G Knepley . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
531e08999f5SMatthew G Knepley . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
532e08999f5SMatthew G Knepley . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
533e08999f5SMatthew G Knepley . -mat_superlu_printstat <FALSE>        - PrintStat (None)
534e08999f5SMatthew G Knepley . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
535e08999f5SMatthew G Knepley . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
536e08999f5SMatthew G Knepley . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
537e08999f5SMatthew G Knepley . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
538e08999f5SMatthew G Knepley . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
539e08999f5SMatthew G Knepley . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
540e08999f5SMatthew G Knepley - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
541b24902e0SBarry Smith 
5422692d6eeSBarry Smith    Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
5435c9eb25fSBarry Smith 
544b24902e0SBarry Smith    Level: beginner
545b24902e0SBarry Smith 
546d45987f3SHong Zhang .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage
547b24902e0SBarry Smith M*/
548b24902e0SBarry Smith 
549db87b0f2SBarry Smith static PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
550b24902e0SBarry Smith {
55114b396bbSKris Buschelman   Mat            B;
552f0c56d0fSKris Buschelman   Mat_SuperLU    *lu;
5536849ba73SBarry Smith   PetscErrorCode ierr;
5545d8b2efaSHong Zhang   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
5558afaa268SBarry Smith   PetscBool      flg,set;
5563cb270beSHong Zhang   PetscReal      real_input;
5575ca28756SHong Zhang   const char     *colperm[]   ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
5585ca28756SHong Zhang   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
5595ca28756SHong Zhang   const char     *rowperm[]   ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
56014b396bbSKris Buschelman 
56114b396bbSKris Buschelman   PetscFunctionBegin;
562ce94432eSBarry Smith   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
563d0f46423SBarry Smith   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
564245fece9SBarry Smith   ierr = PetscStrallocpy("superlu",&((PetscObject)B)->type_name);CHKERRQ(ierr);
565245fece9SBarry Smith   ierr = MatSetUp(B);CHKERRQ(ierr);
566cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
567b24902e0SBarry Smith     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
568cffbb591SHong Zhang     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
569b3a44c85SBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
570cffbb591SHong Zhang 
57100c67f3bSHong Zhang   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
57200c67f3bSHong Zhang   ierr = PetscStrallocpy(MATSOLVERSUPERLU,&B->solvertype);CHKERRQ(ierr);
57300c67f3bSHong Zhang 
574b9c12af5SBarry Smith   B->ops->getinfo     = MatGetInfo_External;
575b24902e0SBarry Smith   B->ops->destroy     = MatDestroy_SuperLU;
5763519fcdcSHong Zhang   B->ops->view        = MatView_SuperLU;
577d5f3da31SBarry Smith   B->factortype       = ftype;
57894b7f48cSBarry Smith   B->assembled        = PETSC_TRUE;           /* required by -ksp_view */
5795c9eb25fSBarry Smith   B->preallocated     = PETSC_TRUE;
58014b396bbSKris Buschelman 
581b00a9115SJed Brown   ierr = PetscNewLog(B,&lu);CHKERRQ(ierr);
582cae5a23dSHong Zhang 
583cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU) {
5849ce50919SHong Zhang     set_default_options(&lu->options);
5853d6581fbSHong Zhang     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
5863d6581fbSHong Zhang       "Whether or not the system will be equilibrated depends on the
5873d6581fbSHong Zhang        scaling of the matrix A, but if equilibration is used, A is
5883d6581fbSHong Zhang        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
5893d6581fbSHong Zhang        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
5903d6581fbSHong Zhang      We set 'options.Equil = NO' as default because additional space is needed for it.
5913d6581fbSHong Zhang     */
5923d6581fbSHong Zhang     lu->options.Equil = NO;
593cffbb591SHong Zhang   } else if (ftype == MAT_FACTOR_ILU) {
5940c74a584SJed Brown     /* Set the default input options of ilu: */
595d387c056SBarry Smith     PetscStackCall("SuperLU:ilu_set_default_options",ilu_set_default_options(&lu->options));
596cffbb591SHong Zhang   }
5979ce50919SHong Zhang   lu->options.PrintStat = NO;
5981a2e2f44SHong Zhang 
5995d8b2efaSHong Zhang   /* Initialize the statistics variables. */
600d387c056SBarry Smith   PetscStackCall("SuperLU:StatInit",StatInit(&lu->stat));
6018914a3f7SHong Zhang   lu->lwork = 0;   /* allocate space internally by system malloc */
6029ce50919SHong Zhang 
603ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
6048afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,NULL);CHKERRQ(ierr);
6058914a3f7SHong Zhang   ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
6062205254eSKarl Rupp   if (flg) lu->options.ColPerm = (colperm_t)indx;
6078914a3f7SHong Zhang   ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
6082205254eSKarl Rupp   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
6098afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,&set);CHKERRQ(ierr);
6108afaa268SBarry Smith   if (set && flg) lu->options.SymmetricMode = YES;
6113cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
6123cb270beSHong Zhang   if (flg) lu->options.DiagPivotThresh = (double) real_input;
6138afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,&set);CHKERRQ(ierr);
6148afaa268SBarry Smith   if (set && flg) lu->options.PivotGrowth = YES;
6158afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,&set);CHKERRQ(ierr);
6168afaa268SBarry Smith   if (set && flg) lu->options.ConditionNumber = YES;
617d7ebd59bSHong Zhang   ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
6182205254eSKarl Rupp   if (flg) lu->options.RowPerm = (rowperm_t)indx;
6198afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,&set);CHKERRQ(ierr);
6208afaa268SBarry Smith   if (set && flg) lu->options.ReplaceTinyPivot = YES;
6218afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,&set);CHKERRQ(ierr);
6228afaa268SBarry Smith   if (set && flg) lu->options.PrintStat = YES;
6230298fd71SBarry Smith   ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,NULL);CHKERRQ(ierr);
6245fe6150dSHong Zhang   if (lu->lwork > 0) {
625d87de817SBarry Smith     /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/
6265fe6150dSHong Zhang     ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
6275fe6150dSHong Zhang   } else if (lu->lwork != 0 && lu->lwork != -1) {
62877431f27SBarry Smith     ierr      = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
6298914a3f7SHong Zhang     lu->lwork = 0;
6308914a3f7SHong Zhang   }
631cffbb591SHong Zhang   /* ilu options */
6323cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
6333cb270beSHong Zhang   if (flg) lu->options.ILU_DropTol = (double) real_input;
6343cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
6353cb270beSHong Zhang   if (flg) lu->options.ILU_FillTol = (double) real_input;
6363cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
6373cb270beSHong Zhang   if (flg) lu->options.ILU_FillFactor = (double) real_input;
6380298fd71SBarry Smith   ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,NULL);CHKERRQ(ierr);
639cffbb591SHong Zhang   ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
6402205254eSKarl Rupp   if (flg) lu->options.ILU_Norm = (norm_t)indx;
641cffbb591SHong Zhang   ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
6422205254eSKarl Rupp   if (flg) lu->options.ILU_MILU = (milu_t)indx;
643245fece9SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
64438abfdaeSHong Zhang   if (lu->options.Equil == YES) {
64538abfdaeSHong Zhang     /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
64638abfdaeSHong Zhang     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
64738abfdaeSHong Zhang   }
6489ce50919SHong Zhang 
6495d8b2efaSHong Zhang   /* Allocate spaces (notice sizes are for the transpose) */
650785e854fSJed Brown   ierr = PetscMalloc1(m,&lu->etree);CHKERRQ(ierr);
651785e854fSJed Brown   ierr = PetscMalloc1(n,&lu->perm_r);CHKERRQ(ierr);
652785e854fSJed Brown   ierr = PetscMalloc1(m,&lu->perm_c);CHKERRQ(ierr);
653785e854fSJed Brown   ierr = PetscMalloc1(n,&lu->R);CHKERRQ(ierr);
654785e854fSJed Brown   ierr = PetscMalloc1(m,&lu->C);CHKERRQ(ierr);
6555d8b2efaSHong Zhang 
6565d8b2efaSHong Zhang   /* create rhs and solution x without allocate space for .Store */
6575d8b2efaSHong Zhang #if defined(PETSC_USE_COMPLEX)
6583cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
659d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
660d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
6613cb270beSHong Zhang #else
662d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
663d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
6643cb270beSHong Zhang #endif
6653cb270beSHong Zhang #else
6663cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
667d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
668d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
6695d8b2efaSHong Zhang #else
670d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
671d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
6725d8b2efaSHong Zhang #endif
6733cb270beSHong Zhang #endif
6745d8b2efaSHong Zhang 
675bdf89e91SBarry Smith   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
676bdf89e91SBarry Smith   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatSuperluSetILUDropTol_C",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
677245fece9SBarry Smith   B->data  = lu;
67820be8e61SHong Zhang 
679899d7b4fSKris Buschelman   *F       = B;
68014b396bbSKris Buschelman   PetscFunctionReturn(0);
68114b396bbSKris Buschelman }
682d954db57SHong Zhang 
68329b38603SBarry Smith PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_SuperLU(void)
68442c9c57cSBarry Smith {
68542c9c57cSBarry Smith   PetscErrorCode ierr;
68642c9c57cSBarry Smith 
68742c9c57cSBarry Smith   PetscFunctionBegin;
68842c9c57cSBarry Smith   ierr = MatSolverPackageRegister(MATSOLVERSUPERLU,MATSEQAIJ,       MAT_FACTOR_LU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr);
68942c9c57cSBarry Smith   ierr = MatSolverPackageRegister(MATSOLVERSUPERLU,MATSEQAIJ,       MAT_FACTOR_ILU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr);
69042c9c57cSBarry Smith   PetscFunctionReturn(0);
69142c9c57cSBarry Smith }
692