xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision 26cc229b24afffee8e9a203ddc8754b40d660fc7)
114b396bbSKris Buschelman 
25a46d3faSBarry Smith /*  --------------------------------------------------------------------
35a46d3faSBarry Smith 
45a46d3faSBarry Smith      This file implements a subclass of the SeqAIJ matrix class that uses
5c2b89b5dSBarry Smith      the SuperLU sparse solver.
614b396bbSKris Buschelman */
714b396bbSKris Buschelman 
85a46d3faSBarry Smith /*
95a46d3faSBarry Smith      Defines the data structure for the base matrix type (SeqAIJ)
105a46d3faSBarry Smith */
11c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
1214b396bbSKris Buschelman 
135a46d3faSBarry Smith /*
145a46d3faSBarry Smith      SuperLU include files
155a46d3faSBarry Smith */
1614b396bbSKris Buschelman EXTERN_C_BEGIN
1714b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
183cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
193cb270beSHong Zhang #include <slu_cdefs.h>
203cb270beSHong Zhang #else
21c6db04a5SJed Brown #include <slu_zdefs.h>
223cb270beSHong Zhang #endif
233cb270beSHong Zhang #else
243cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
253cb270beSHong Zhang #include <slu_sdefs.h>
2614b396bbSKris Buschelman #else
27c6db04a5SJed Brown #include <slu_ddefs.h>
2814b396bbSKris Buschelman #endif
293cb270beSHong Zhang #endif
30c6db04a5SJed Brown #include <slu_util.h>
3114b396bbSKris Buschelman EXTERN_C_END
3214b396bbSKris Buschelman 
335a46d3faSBarry Smith /*
34245fece9SBarry Smith      This is the data that defines the SuperLU factored matrix type
355a46d3faSBarry Smith */
3614b396bbSKris Buschelman typedef struct {
3775af56d4SHong Zhang   SuperMatrix       A,L,U,B,X;
3875af56d4SHong Zhang   superlu_options_t options;
39da7a1d00SHong Zhang   PetscInt          *perm_c; /* column permutation vector */
40da7a1d00SHong Zhang   PetscInt          *perm_r; /* row permutations from partial pivoting */
41da7a1d00SHong Zhang   PetscInt          *etree;
42da7a1d00SHong Zhang   PetscReal         *R, *C;
4375af56d4SHong Zhang   char              equed[1];
44da7a1d00SHong Zhang   PetscInt          lwork;
4575af56d4SHong Zhang   void              *work;
46da7a1d00SHong Zhang   PetscReal         rpg, rcond;
4775af56d4SHong Zhang   mem_usage_t       mem_usage;
4875af56d4SHong Zhang   MatStructure      flg;
495d8b2efaSHong Zhang   SuperLUStat_t     stat;
50cae5a23dSHong Zhang   Mat               A_dup;
51cae5a23dSHong Zhang   PetscScalar       *rhs_dup;
52c147c726SHong Zhang   GlobalLU_t        Glu;
532366e350SStefano Zampini   PetscBool         needconversion;
5414b396bbSKris Buschelman 
5514b396bbSKris Buschelman   /* Flag to clean up (non-global) SuperLU objects during Destroy */
56ace3abfcSBarry Smith   PetscBool CleanUpSuperLU;
57f0c56d0fSKris Buschelman } Mat_SuperLU;
5814b396bbSKris Buschelman 
595a46d3faSBarry Smith /*
605a46d3faSBarry Smith     Utility function
615a46d3faSBarry Smith */
62860c79edSBarry Smith static PetscErrorCode MatView_Info_SuperLU(Mat A,PetscViewer viewer)
635a46d3faSBarry Smith {
64245fece9SBarry Smith   Mat_SuperLU       *lu= (Mat_SuperLU*)A->data;
655a46d3faSBarry Smith   superlu_options_t options;
665a46d3faSBarry Smith 
675a46d3faSBarry Smith   PetscFunctionBegin;
685a46d3faSBarry Smith   options = lu->options;
692205254eSKarl Rupp 
709566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n"));
719566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES" : "NO"));
729566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"  ColPerm: %" PetscInt_FMT "\n",options.ColPerm));
739566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"  IterRefine: %" PetscInt_FMT "\n",options.IterRefine));
749566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES" : "NO"));
759566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh));
769566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES" : "NO"));
779566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES" : "NO"));
789566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"  RowPerm: %" PetscInt_FMT "\n",options.RowPerm));
799566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES" : "NO"));
809566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES" : "NO"));
819566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer,"  lwork: %" PetscInt_FMT "\n",lu->lwork));
82d5f3da31SBarry Smith   if (A->factortype == MAT_FACTOR_ILU) {
839566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol));
849566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol));
859566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor));
869566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %" PetscInt_FMT "\n",options.ILU_DropRule));
879566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %" PetscInt_FMT "\n",options.ILU_Norm));
889566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %" PetscInt_FMT "\n",options.ILU_MILU));
89cffbb591SHong Zhang   }
905a46d3faSBarry Smith   PetscFunctionReturn(0);
915a46d3faSBarry Smith }
925a46d3faSBarry Smith 
93245fece9SBarry Smith PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
94245fece9SBarry Smith {
95245fece9SBarry Smith   Mat_SuperLU       *lu = (Mat_SuperLU*)A->data;
96245fece9SBarry Smith   const PetscScalar *barray;
97245fece9SBarry Smith   PetscScalar       *xarray;
98245fece9SBarry Smith   PetscInt          info,i,n;
99245fece9SBarry Smith   PetscReal         ferr,berr;
100245fece9SBarry Smith   static PetscBool  cite = PETSC_FALSE;
101245fece9SBarry Smith 
102245fece9SBarry Smith   PetscFunctionBegin;
103245fece9SBarry Smith   if (lu->lwork == -1) PetscFunctionReturn(0);
1049566063dSJacob Faibussowitsch   PetscCall(PetscCitationsRegister("@article{superlu99,\n  author  = {James W. Demmel and Stanley C. Eisenstat and\n             John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n  title = {A supernodal approach to sparse partial pivoting},\n  journal = {SIAM J. Matrix Analysis and Applications},\n  year = {1999},\n  volume  = {20},\n  number = {3},\n  pages = {720-755}\n}\n",&cite));
105245fece9SBarry Smith 
1069566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(x,&n));
107245fece9SBarry Smith   lu->B.ncol = 1;   /* Set the number of right-hand side */
108245fece9SBarry Smith   if (lu->options.Equil && !lu->rhs_dup) {
109245fece9SBarry Smith     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
1109566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n,&lu->rhs_dup));
111245fece9SBarry Smith   }
112245fece9SBarry Smith   if (lu->options.Equil) {
113245fece9SBarry Smith     /* Copy b into rsh_dup */
1149566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(b,&barray));
1159566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(lu->rhs_dup,barray,n));
1169566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(b,&barray));
117245fece9SBarry Smith     barray = lu->rhs_dup;
118245fece9SBarry Smith   } else {
1199566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(b,&barray));
120245fece9SBarry Smith   }
1219566063dSJacob Faibussowitsch   PetscCall(VecGetArray(x,&xarray));
122245fece9SBarry Smith 
123245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
124245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
125245fece9SBarry Smith   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
126245fece9SBarry Smith   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
127245fece9SBarry Smith #else
128245fece9SBarry Smith   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
129245fece9SBarry Smith   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
130245fece9SBarry Smith #endif
131245fece9SBarry Smith #else
132245fece9SBarry Smith   ((DNformat*)lu->B.Store)->nzval = (void*)barray;
133245fece9SBarry Smith   ((DNformat*)lu->X.Store)->nzval = xarray;
134245fece9SBarry Smith #endif
135245fece9SBarry Smith 
136245fece9SBarry Smith   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
137245fece9SBarry Smith   if (A->factortype == MAT_FACTOR_LU) {
138245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
139245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
140245fece9SBarry Smith     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
141245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
142245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
143245fece9SBarry Smith #else
144245fece9SBarry Smith     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
145245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
146245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
147245fece9SBarry Smith #endif
148245fece9SBarry Smith #else
149245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
150245fece9SBarry Smith     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
151245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
152245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
153245fece9SBarry Smith #else
154245fece9SBarry Smith     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
155245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
156245fece9SBarry Smith                                      &lu->Glu,&lu->mem_usage, &lu->stat, &info));
157245fece9SBarry Smith #endif
158245fece9SBarry Smith #endif
159245fece9SBarry Smith   } else if (A->factortype == MAT_FACTOR_ILU) {
160245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
161245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
162245fece9SBarry Smith     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
163245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
164245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
165245fece9SBarry Smith #else
166245fece9SBarry Smith     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
167245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
168245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
169245fece9SBarry Smith #endif
170245fece9SBarry Smith #else
171245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
172245fece9SBarry Smith     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
173245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
174245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
175245fece9SBarry Smith #else
176245fece9SBarry Smith     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
177245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
178245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
179245fece9SBarry Smith #endif
180245fece9SBarry Smith #endif
181245fece9SBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
182245fece9SBarry Smith   if (!lu->options.Equil) {
1839566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(b,&barray));
184245fece9SBarry Smith   }
1859566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(x,&xarray));
186245fece9SBarry Smith 
187245fece9SBarry Smith   if (!info || info == lu->A.ncol+1) {
188245fece9SBarry Smith     if (lu->options.IterRefine) {
1899566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n"));
1909566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"));
1919566063dSJacob Faibussowitsch       for (i = 0; i < 1; ++i) PetscCall(PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr));
192245fece9SBarry Smith     }
193245fece9SBarry Smith   } else if (info > 0) {
194245fece9SBarry Smith     if (lu->lwork == -1) {
1959566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %" PetscInt_FMT " bytes\n", info - lu->A.ncol));
196245fece9SBarry Smith     } else {
1979566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %" PetscInt_FMT "\n",info));
198245fece9SBarry Smith     }
1995f80ce2aSJacob Faibussowitsch   } else PetscCheck(info >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", info,-info);
200245fece9SBarry Smith 
201245fece9SBarry Smith   if (lu->options.PrintStat) {
2029566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n"));
203245fece9SBarry Smith     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
204245fece9SBarry Smith   }
205245fece9SBarry Smith   PetscFunctionReturn(0);
206245fece9SBarry Smith }
207245fece9SBarry Smith 
208245fece9SBarry Smith PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
209245fece9SBarry Smith {
210245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)A->data;
211245fece9SBarry Smith 
212245fece9SBarry Smith   PetscFunctionBegin;
213603e8f96SBarry Smith   if (A->factorerrortype) {
2149566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, skip\n"));
2159566063dSJacob Faibussowitsch     PetscCall(VecSetInf(x));
216245fece9SBarry Smith     PetscFunctionReturn(0);
217245fece9SBarry Smith   }
218245fece9SBarry Smith 
219245fece9SBarry Smith   lu->options.Trans = TRANS;
2209566063dSJacob Faibussowitsch   PetscCall(MatSolve_SuperLU_Private(A,b,x));
221245fece9SBarry Smith   PetscFunctionReturn(0);
222245fece9SBarry Smith }
223245fece9SBarry Smith 
224245fece9SBarry Smith PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
225245fece9SBarry Smith {
226245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)A->data;
227245fece9SBarry Smith 
228245fece9SBarry Smith   PetscFunctionBegin;
229603e8f96SBarry Smith   if (A->factorerrortype) {
2309566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A,"MatSolve is called with singular matrix factor, skip\n"));
2319566063dSJacob Faibussowitsch     PetscCall(VecSetInf(x));
232245fece9SBarry Smith     PetscFunctionReturn(0);
233245fece9SBarry Smith   }
234245fece9SBarry Smith 
235245fece9SBarry Smith   lu->options.Trans = NOTRANS;
2369566063dSJacob Faibussowitsch   PetscCall(MatSolve_SuperLU_Private(A,b,x));
237245fece9SBarry Smith   PetscFunctionReturn(0);
238245fece9SBarry Smith }
239245fece9SBarry Smith 
240245fece9SBarry Smith static PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
2415a46d3faSBarry Smith {
242245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)F->data;
243cae5a23dSHong Zhang   Mat_SeqAIJ     *aa;
2445a46d3faSBarry Smith   PetscInt       sinfo;
2455a46d3faSBarry Smith   PetscReal      ferr, berr;
2465a46d3faSBarry Smith   NCformat       *Ustore;
2475a46d3faSBarry Smith   SCformat       *Lstore;
2485a46d3faSBarry Smith 
2495a46d3faSBarry Smith   PetscFunctionBegin;
2505a46d3faSBarry Smith   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
2515a46d3faSBarry Smith     lu->options.Fact = SamePattern;
2525a46d3faSBarry Smith     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
2535a46d3faSBarry Smith     Destroy_SuperMatrix_Store(&lu->A);
2542366e350SStefano Zampini     if (lu->A_dup) {
2559566063dSJacob Faibussowitsch       PetscCall(MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN));
256cae5a23dSHong Zhang     }
2572366e350SStefano Zampini 
2585a46d3faSBarry Smith     if (lu->lwork >= 0) {
259d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
260d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
2615a46d3faSBarry Smith       lu->options.Fact = SamePattern;
2625a46d3faSBarry Smith     }
2635a46d3faSBarry Smith   }
2645a46d3faSBarry Smith 
2655a46d3faSBarry Smith   /* Create the SuperMatrix for lu->A=A^T:
2665a46d3faSBarry Smith        Since SuperLU likes column-oriented matrices,we pass it the transpose,
2675a46d3faSBarry Smith        and then solve A^T X = B in MatSolve(). */
2682366e350SStefano Zampini   if (lu->A_dup) {
269cae5a23dSHong Zhang     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
270cae5a23dSHong Zhang   } else {
271cae5a23dSHong Zhang     aa = (Mat_SeqAIJ*)(A)->data;
272cae5a23dSHong Zhang   }
2735a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
2743cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
275d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_CompCol_Matrix",cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE));
2763cb270beSHong Zhang #else
277d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_CompCol_Matrix",zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE));
2783cb270beSHong Zhang #endif
2793cb270beSHong Zhang #else
2803cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
281d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_CompCol_Matrix",sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE));
2825a46d3faSBarry Smith #else
283d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_CompCol_Matrix",dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE));
2845a46d3faSBarry Smith #endif
2853cb270beSHong Zhang #endif
2865a46d3faSBarry Smith 
2875a46d3faSBarry Smith   /* Numerical factorization */
2885a46d3faSBarry Smith   lu->B.ncol = 0;  /* Indicate not to solve the system */
289d5f3da31SBarry Smith   if (F->factortype == MAT_FACTOR_LU) {
2905a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
2913cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
292d387c056SBarry Smith     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
2933cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
2945db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2953cb270beSHong Zhang #else
296d387c056SBarry Smith     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
2975a46d3faSBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
2985db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2993cb270beSHong Zhang #endif
3003cb270beSHong Zhang #else
3013cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
302d387c056SBarry Smith     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3033cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
3045db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
3055a46d3faSBarry Smith #else
306d387c056SBarry Smith     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3075a46d3faSBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
308c147c726SHong Zhang                                      &lu->Glu,&lu->mem_usage, &lu->stat, &sinfo));
3095a46d3faSBarry Smith #endif
3103cb270beSHong Zhang #endif
311d5f3da31SBarry Smith   } else if (F->factortype == MAT_FACTOR_ILU) {
312cffbb591SHong Zhang     /* Compute the incomplete factorization, condition number and pivot growth */
313cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX)
3143cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
315d387c056SBarry Smith     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
3163cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
3175db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
3183cb270beSHong Zhang #else
319d387c056SBarry Smith     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
320cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
3215db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
3223cb270beSHong Zhang #endif
3233cb270beSHong Zhang #else
3243cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
325d387c056SBarry Smith     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3263cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
3275db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
328cffbb591SHong Zhang #else
329d387c056SBarry Smith     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
330cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
331c147c726SHong Zhang                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
332cffbb591SHong Zhang #endif
3333cb270beSHong Zhang #endif
334f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
3355a46d3faSBarry Smith   if (!sinfo || sinfo == lu->A.ncol+1) {
3362205254eSKarl Rupp     if (lu->options.PivotGrowth) {
3379566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg));
3382205254eSKarl Rupp     }
3392205254eSKarl Rupp     if (lu->options.ConditionNumber) {
3409566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond));
3412205254eSKarl Rupp     }
3425a46d3faSBarry Smith   } else if (sinfo > 0) {
343675d1226SHong Zhang     if (A->erroriffailure) {
34498921bdaSJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %" PetscInt_FMT,sinfo);
345675d1226SHong Zhang     } else {
346675d1226SHong Zhang       if (sinfo <= lu->A.ncol) {
347675d1226SHong Zhang         if (lu->options.ILU_FillTol == 0.0) {
348603e8f96SBarry Smith           F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
349675d1226SHong Zhang         }
3509566063dSJacob Faibussowitsch         PetscCall(PetscInfo(F,"Number of zero pivots %" PetscInt_FMT ", ILU_FillTol %g\n",sinfo,lu->options.ILU_FillTol));
351675d1226SHong Zhang       } else if (sinfo == lu->A.ncol + 1) {
352675d1226SHong Zhang         /*
353675d1226SHong Zhang          U is nonsingular, but RCOND is less than machine
354675d1226SHong Zhang                       precision, meaning that the matrix is singular to
355675d1226SHong Zhang                       working precision. Nevertheless, the solution and
356675d1226SHong Zhang                       error bounds are computed because there are a number
357675d1226SHong Zhang                       of situations where the computed solution can be more
358675d1226SHong Zhang                       accurate than the value of RCOND would suggest.
359675d1226SHong Zhang          */
3609566063dSJacob Faibussowitsch         PetscCall(PetscInfo(F,"Matrix factor U is nonsingular, but is singular to working precision. The solution is computed. info %" PetscInt_FMT,sinfo));
361675d1226SHong Zhang       } else { /* sinfo > lu->A.ncol + 1 */
362603e8f96SBarry Smith         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
3639566063dSJacob Faibussowitsch         PetscCall(PetscInfo(F,"Number of bytes allocated when memory allocation fails %" PetscInt_FMT "\n",sinfo));
364675d1226SHong Zhang       }
365675d1226SHong Zhang     }
36698921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", sinfo,-sinfo);
3675a46d3faSBarry Smith 
3685a46d3faSBarry Smith   if (lu->options.PrintStat) {
3699566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n"));
370d387c056SBarry Smith     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
3715a46d3faSBarry Smith     Lstore = (SCformat*) lu->L.Store;
3725a46d3faSBarry Smith     Ustore = (NCformat*) lu->U.Store;
3739566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %" PetscInt_FMT "\n", Lstore->nnz));
3749566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %" PetscInt_FMT "\n", Ustore->nnz));
3759566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %" PetscInt_FMT "\n", Lstore->nnz + Ustore->nnz - lu->A.ncol));
3769566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6));
3775a46d3faSBarry Smith   }
3785a46d3faSBarry Smith 
3795a46d3faSBarry Smith   lu->flg                = SAME_NONZERO_PATTERN;
3801d5ca7f3SHong Zhang   F->ops->solve          = MatSolve_SuperLU;
3811d5ca7f3SHong Zhang   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
3821aef8b4cSStefano Zampini   F->ops->matsolve       = NULL;
3835a46d3faSBarry Smith   PetscFunctionReturn(0);
3845a46d3faSBarry Smith }
3855a46d3faSBarry Smith 
386245fece9SBarry Smith static PetscErrorCode MatDestroy_SuperLU(Mat A)
38714b396bbSKris Buschelman {
388245fece9SBarry Smith   Mat_SuperLU    *lu=(Mat_SuperLU*)A->data;
38914b396bbSKris Buschelman 
39014b396bbSKris Buschelman   PetscFunctionBegin;
391245fece9SBarry Smith   if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
392d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->A));
393d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->B));
394d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->X));
395d387c056SBarry Smith     PetscStackCall("SuperLU:StatFree",StatFree(&lu->stat));
3960e742b69SHong Zhang     if (lu->lwork >= 0) {
397d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
398d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
3990e742b69SHong Zhang     }
4000e742b69SHong Zhang   }
4019566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->etree));
4029566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->perm_r));
4039566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->perm_c));
4049566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->R));
4059566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->C));
4069566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->rhs_dup));
4079566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&lu->A_dup));
4089566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
4090e742b69SHong Zhang 
410d954db57SHong Zhang   /* clear composed functions */
4119566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL));
4129566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A,"MatSuperluSetILUDropTol_C",NULL));
41314b396bbSKris Buschelman   PetscFunctionReturn(0);
41414b396bbSKris Buschelman }
41514b396bbSKris Buschelman 
416245fece9SBarry Smith static PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
41714b396bbSKris Buschelman {
418ace3abfcSBarry Smith   PetscBool         iascii;
41914b396bbSKris Buschelman   PetscViewerFormat format;
42014b396bbSKris Buschelman 
42114b396bbSKris Buschelman   PetscFunctionBegin;
4229566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii));
42332077d6dSBarry Smith   if (iascii) {
4249566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer,&format));
4252f59403fSHong Zhang     if (format == PETSC_VIEWER_ASCII_INFO) {
4269566063dSJacob Faibussowitsch       PetscCall(MatView_Info_SuperLU(A,viewer));
42714b396bbSKris Buschelman     }
42814b396bbSKris Buschelman   }
42914b396bbSKris Buschelman   PetscFunctionReturn(0);
43014b396bbSKris Buschelman }
43114b396bbSKris Buschelman 
432e0b74bf9SHong Zhang PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
433e0b74bf9SHong Zhang {
434245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)A->data;
435cd723cd1SBarry Smith   PetscBool      flg;
436e0b74bf9SHong Zhang 
437e0b74bf9SHong Zhang   PetscFunctionBegin;
4389566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL));
4395f80ce2aSJacob Faibussowitsch   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
4409566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL));
4415f80ce2aSJacob Faibussowitsch   PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
4422205254eSKarl Rupp   lu->options.Trans = TRANS;
443e0b74bf9SHong Zhang   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
444e0b74bf9SHong Zhang   PetscFunctionReturn(0);
445e0b74bf9SHong Zhang }
446e0b74bf9SHong Zhang 
447245fece9SBarry Smith static PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
44814b396bbSKris Buschelman {
449245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)(F->data);
450*26cc229bSBarry Smith   PetscInt       indx;
451*26cc229bSBarry Smith   PetscBool      flg,set;
452*26cc229bSBarry Smith   PetscReal      real_input;
453*26cc229bSBarry Smith   const char     *colperm[]   ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
454*26cc229bSBarry Smith   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
455*26cc229bSBarry Smith   const char     *rowperm[]   ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
456b24902e0SBarry Smith 
457b24902e0SBarry Smith   PetscFunctionBegin;
458*26cc229bSBarry Smith   /* Set options to F */
459*26cc229bSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)F),((PetscObject)F)->prefix,"SuperLU Options","Mat");
460*26cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,NULL));
461*26cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg));
462*26cc229bSBarry Smith   if (flg) lu->options.ColPerm = (colperm_t)indx;
463*26cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg));
464*26cc229bSBarry Smith   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
465*26cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,&set));
466*26cc229bSBarry Smith   if (set && flg) lu->options.SymmetricMode = YES;
467*26cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg));
468*26cc229bSBarry Smith   if (flg) lu->options.DiagPivotThresh = (double) real_input;
469*26cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,&set));
470*26cc229bSBarry Smith   if (set && flg) lu->options.PivotGrowth = YES;
471*26cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,&set));
472*26cc229bSBarry Smith   if (set && flg) lu->options.ConditionNumber = YES;
473*26cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg));
474*26cc229bSBarry Smith   if (flg) lu->options.RowPerm = (rowperm_t)indx;
475*26cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,&set));
476*26cc229bSBarry Smith   if (set && flg) lu->options.ReplaceTinyPivot = YES;
477*26cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,&set));
478*26cc229bSBarry Smith   if (set && flg) lu->options.PrintStat = YES;
479*26cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,NULL));
480*26cc229bSBarry Smith   if (lu->lwork > 0) {
481*26cc229bSBarry Smith     /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/
482*26cc229bSBarry Smith     PetscCall(PetscMalloc(lu->lwork,&lu->work));
483*26cc229bSBarry Smith   } else if (lu->lwork != 0 && lu->lwork != -1) {
484*26cc229bSBarry Smith     PetscCall(PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %" PetscInt_FMT " is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork));
485*26cc229bSBarry Smith     lu->lwork = 0;
486*26cc229bSBarry Smith   }
487*26cc229bSBarry Smith   /* ilu options */
488*26cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg));
489*26cc229bSBarry Smith   if (flg) lu->options.ILU_DropTol = (double) real_input;
490*26cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg));
491*26cc229bSBarry Smith   if (flg) lu->options.ILU_FillTol = (double) real_input;
492*26cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg));
493*26cc229bSBarry Smith   if (flg) lu->options.ILU_FillFactor = (double) real_input;
494*26cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,NULL));
495*26cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg));
496*26cc229bSBarry Smith   if (flg) lu->options.ILU_Norm = (norm_t)indx;
497*26cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg));
498*26cc229bSBarry Smith   if (flg) lu->options.ILU_MILU = (milu_t)indx;
499*26cc229bSBarry Smith   PetscOptionsEnd();
500*26cc229bSBarry Smith 
501b24902e0SBarry Smith   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
502b24902e0SBarry Smith   lu->CleanUpSuperLU      = PETSC_TRUE;
5031d5ca7f3SHong Zhang   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
5042366e350SStefano Zampini 
5052366e350SStefano Zampini   /* if we are here, the nonzero pattern has changed unless the user explicitly called MatLUFactorSymbolic */
5069566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&lu->A_dup));
5072366e350SStefano Zampini   if (lu->needconversion) {
5089566063dSJacob Faibussowitsch     PetscCall(MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&lu->A_dup));
5092366e350SStefano Zampini   }
5102366e350SStefano Zampini   if (lu->options.Equil == YES && !lu->A_dup) { /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
5119566063dSJacob Faibussowitsch     PetscCall(MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup));
5122366e350SStefano Zampini   }
513b24902e0SBarry Smith   PetscFunctionReturn(0);
514b24902e0SBarry Smith }
515b24902e0SBarry Smith 
516b2573a8aSBarry Smith static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
5175ccb76cbSHong Zhang {
518245fece9SBarry Smith   Mat_SuperLU *lu= (Mat_SuperLU*)F->data;
5195ccb76cbSHong Zhang 
5205ccb76cbSHong Zhang   PetscFunctionBegin;
5215ccb76cbSHong Zhang   lu->options.ILU_DropTol = dtol;
5225ccb76cbSHong Zhang   PetscFunctionReturn(0);
5235ccb76cbSHong Zhang }
5245ccb76cbSHong Zhang 
5255ccb76cbSHong Zhang /*@
5265ccb76cbSHong Zhang   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
527147403d9SBarry Smith 
5285ccb76cbSHong Zhang    Logically Collective on Mat
5295ccb76cbSHong Zhang 
5305ccb76cbSHong Zhang    Input Parameters:
5315ccb76cbSHong Zhang +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
5325ccb76cbSHong Zhang -  dtol - drop tolerance
5335ccb76cbSHong Zhang 
5345ccb76cbSHong Zhang   Options Database:
535147403d9SBarry Smith .   -mat_superlu_ilu_droptol <dtol> - the drop tolerance
5365ccb76cbSHong Zhang 
5375ccb76cbSHong Zhang    Level: beginner
5385ccb76cbSHong Zhang 
53996a0c994SBarry Smith    References:
540606c0280SSatish Balay .  * - SuperLU Users' Guide
5415ccb76cbSHong Zhang 
542db781477SPatrick Sanan .seealso: `MatGetFactor()`
5435ccb76cbSHong Zhang @*/
5445ccb76cbSHong Zhang PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
5455ccb76cbSHong Zhang {
5465ccb76cbSHong Zhang   PetscFunctionBegin;
5475ccb76cbSHong Zhang   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
54869fbec6eSBarry Smith   PetscValidLogicalCollectiveReal(F,dtol,2);
549cac4c232SBarry Smith   PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));
5505ccb76cbSHong Zhang   PetscFunctionReturn(0);
5515ccb76cbSHong Zhang }
5525ccb76cbSHong Zhang 
553ea799195SBarry Smith PetscErrorCode MatFactorGetSolverType_seqaij_superlu(Mat A,MatSolverType *type)
55435bd34faSBarry Smith {
55535bd34faSBarry Smith   PetscFunctionBegin;
5562692d6eeSBarry Smith   *type = MATSOLVERSUPERLU;
55735bd34faSBarry Smith   PetscFunctionReturn(0);
55835bd34faSBarry Smith }
55935bd34faSBarry Smith 
560b24902e0SBarry Smith /*MC
561ba992d64SSatish Balay   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
562b24902e0SBarry Smith   via the external package SuperLU.
563b24902e0SBarry Smith 
564e2e64c6bSBarry Smith   Use ./configure --download-superlu to have PETSc installed with SuperLU
565b24902e0SBarry Smith 
5663ca39a21SBarry Smith   Use -pc_type lu -pc_factor_mat_solver_type superlu to use this direct solver
567c2b89b5dSBarry Smith 
568b24902e0SBarry Smith   Options Database Keys:
569e08999f5SMatthew G Knepley + -mat_superlu_equil <FALSE>            - Equil (None)
570e08999f5SMatthew G Knepley . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
571e08999f5SMatthew G Knepley . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
572e08999f5SMatthew G Knepley . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
573e08999f5SMatthew G Knepley . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
574e08999f5SMatthew G Knepley . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
575e08999f5SMatthew G Knepley . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
576e08999f5SMatthew G Knepley . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
577e08999f5SMatthew G Knepley . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
578e08999f5SMatthew G Knepley . -mat_superlu_printstat <FALSE>        - PrintStat (None)
579e08999f5SMatthew G Knepley . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
580e08999f5SMatthew G Knepley . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
581e08999f5SMatthew G Knepley . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
582e08999f5SMatthew G Knepley . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
583e08999f5SMatthew G Knepley . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
584e08999f5SMatthew G Knepley . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
585e08999f5SMatthew G Knepley - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
586b24902e0SBarry Smith 
58795452b02SPatrick Sanan    Notes:
58895452b02SPatrick Sanan     Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
5895c9eb25fSBarry Smith 
5902c7c0729SBarry Smith     Cannot currently use ordering provided by PETSc.
5912c7c0729SBarry Smith 
592b24902e0SBarry Smith    Level: beginner
593b24902e0SBarry Smith 
594db781477SPatrick Sanan .seealso: `PCLU`, `PCILU`, `MATSOLVERSUPERLU_DIST`, `MATSOLVERMUMPS`, `PCFactorSetMatSolverType()`, `MatSolverType`
595b24902e0SBarry Smith M*/
596b24902e0SBarry Smith 
597db87b0f2SBarry Smith static PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
598b24902e0SBarry Smith {
59914b396bbSKris Buschelman   Mat            B;
600f0c56d0fSKris Buschelman   Mat_SuperLU    *lu;
601*26cc229bSBarry Smith   PetscInt       m=A->rmap->n,n=A->cmap->n;
60214b396bbSKris Buschelman 
60314b396bbSKris Buschelman   PetscFunctionBegin;
6049566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A),&B));
6059566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE));
6069566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy("superlu",&((PetscObject)B)->type_name));
6079566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
60866e17bc3SBarry Smith   B->trivialsymbolic = PETSC_TRUE;
609cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
610b24902e0SBarry Smith     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
611cffbb591SHong Zhang     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
612b3a44c85SBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
613cffbb591SHong Zhang 
6149566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
6159566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSUPERLU,&B->solvertype));
61600c67f3bSHong Zhang 
617b9c12af5SBarry Smith   B->ops->getinfo     = MatGetInfo_External;
618b24902e0SBarry Smith   B->ops->destroy     = MatDestroy_SuperLU;
6193519fcdcSHong Zhang   B->ops->view        = MatView_SuperLU;
620d5f3da31SBarry Smith   B->factortype       = ftype;
62194b7f48cSBarry Smith   B->assembled        = PETSC_TRUE;           /* required by -ksp_view */
6225c9eb25fSBarry Smith   B->preallocated     = PETSC_TRUE;
62314b396bbSKris Buschelman 
6249566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(B,&lu));
625cae5a23dSHong Zhang 
626cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU) {
6279ce50919SHong Zhang     set_default_options(&lu->options);
6283d6581fbSHong Zhang     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
6293d6581fbSHong Zhang       "Whether or not the system will be equilibrated depends on the
6303d6581fbSHong Zhang        scaling of the matrix A, but if equilibration is used, A is
6313d6581fbSHong Zhang        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
6323d6581fbSHong Zhang        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
6333d6581fbSHong Zhang      We set 'options.Equil = NO' as default because additional space is needed for it.
6343d6581fbSHong Zhang     */
6353d6581fbSHong Zhang     lu->options.Equil = NO;
636cffbb591SHong Zhang   } else if (ftype == MAT_FACTOR_ILU) {
6370c74a584SJed Brown     /* Set the default input options of ilu: */
638d387c056SBarry Smith     PetscStackCall("SuperLU:ilu_set_default_options",ilu_set_default_options(&lu->options));
639cffbb591SHong Zhang   }
6409ce50919SHong Zhang   lu->options.PrintStat = NO;
6411a2e2f44SHong Zhang 
6425d8b2efaSHong Zhang   /* Initialize the statistics variables. */
643d387c056SBarry Smith   PetscStackCall("SuperLU:StatInit",StatInit(&lu->stat));
6448914a3f7SHong Zhang   lu->lwork = 0;   /* allocate space internally by system malloc */
6459ce50919SHong Zhang 
6465d8b2efaSHong Zhang   /* Allocate spaces (notice sizes are for the transpose) */
6479566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m,&lu->etree));
6489566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n,&lu->perm_r));
6499566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m,&lu->perm_c));
6509566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n,&lu->R));
6519566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m,&lu->C));
6525d8b2efaSHong Zhang 
6535d8b2efaSHong Zhang   /* create rhs and solution x without allocate space for .Store */
6545d8b2efaSHong Zhang #if defined(PETSC_USE_COMPLEX)
6553cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
656d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
657d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
6583cb270beSHong Zhang #else
659d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
660d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
6613cb270beSHong Zhang #endif
6623cb270beSHong Zhang #else
6633cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
664d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
665d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
6665d8b2efaSHong Zhang #else
667d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
668d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
6695d8b2efaSHong Zhang #endif
6703cb270beSHong Zhang #endif
6715d8b2efaSHong Zhang 
6729566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_seqaij_superlu));
6739566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B,"MatSuperluSetILUDropTol_C",MatSuperluSetILUDropTol_SuperLU));
674245fece9SBarry Smith   B->data  = lu;
67520be8e61SHong Zhang 
676899d7b4fSKris Buschelman   *F       = B;
67714b396bbSKris Buschelman   PetscFunctionReturn(0);
67814b396bbSKris Buschelman }
679d954db57SHong Zhang 
6802366e350SStefano Zampini static PetscErrorCode MatGetFactor_seqsell_superlu(Mat A,MatFactorType ftype,Mat *F)
6812366e350SStefano Zampini {
6822366e350SStefano Zampini   Mat_SuperLU    *lu;
6832366e350SStefano Zampini 
6842366e350SStefano Zampini   PetscFunctionBegin;
6859566063dSJacob Faibussowitsch   PetscCall(MatGetFactor_seqaij_superlu(A,ftype,F));
6862366e350SStefano Zampini   lu   = (Mat_SuperLU*)((*F)->data);
6872366e350SStefano Zampini   lu->needconversion = PETSC_TRUE;
6882366e350SStefano Zampini   PetscFunctionReturn(0);
6892366e350SStefano Zampini }
6902366e350SStefano Zampini 
6913ca39a21SBarry Smith PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU(void)
69242c9c57cSBarry Smith {
69342c9c57cSBarry Smith   PetscFunctionBegin;
6949566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_seqaij_superlu));
6959566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU,MATSEQAIJ,MAT_FACTOR_ILU,MatGetFactor_seqaij_superlu));
6969566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_seqsell_superlu));
6979566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU,MATSEQSELL,MAT_FACTOR_ILU,MatGetFactor_seqsell_superlu));
69842c9c57cSBarry Smith   PetscFunctionReturn(0);
69942c9c57cSBarry Smith }
700