xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision 48a46eb9bd028bec07ec0f396b1a3abb43f14558)
114b396bbSKris Buschelman 
25a46d3faSBarry Smith /*  --------------------------------------------------------------------
35a46d3faSBarry Smith 
45a46d3faSBarry Smith      This file implements a subclass of the SeqAIJ matrix class that uses
5c2b89b5dSBarry Smith      the SuperLU sparse solver.
614b396bbSKris Buschelman */
714b396bbSKris Buschelman 
85a46d3faSBarry Smith /*
95a46d3faSBarry Smith      Defines the data structure for the base matrix type (SeqAIJ)
105a46d3faSBarry Smith */
11c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
1214b396bbSKris Buschelman 
135a46d3faSBarry Smith /*
145a46d3faSBarry Smith      SuperLU include files
155a46d3faSBarry Smith */
1614b396bbSKris Buschelman EXTERN_C_BEGIN
1714b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
183cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
193cb270beSHong Zhang #include <slu_cdefs.h>
203cb270beSHong Zhang #else
21c6db04a5SJed Brown #include <slu_zdefs.h>
223cb270beSHong Zhang #endif
233cb270beSHong Zhang #else
243cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
253cb270beSHong Zhang #include <slu_sdefs.h>
2614b396bbSKris Buschelman #else
27c6db04a5SJed Brown #include <slu_ddefs.h>
2814b396bbSKris Buschelman #endif
293cb270beSHong Zhang #endif
30c6db04a5SJed Brown #include <slu_util.h>
3114b396bbSKris Buschelman EXTERN_C_END
3214b396bbSKris Buschelman 
335a46d3faSBarry Smith /*
34245fece9SBarry Smith      This is the data that defines the SuperLU factored matrix type
355a46d3faSBarry Smith */
3614b396bbSKris Buschelman typedef struct {
3775af56d4SHong Zhang   SuperMatrix       A, L, U, B, X;
3875af56d4SHong Zhang   superlu_options_t options;
39da7a1d00SHong Zhang   PetscInt         *perm_c; /* column permutation vector */
40da7a1d00SHong Zhang   PetscInt         *perm_r; /* row permutations from partial pivoting */
41da7a1d00SHong Zhang   PetscInt         *etree;
42da7a1d00SHong Zhang   PetscReal        *R, *C;
4375af56d4SHong Zhang   char              equed[1];
44da7a1d00SHong Zhang   PetscInt          lwork;
4575af56d4SHong Zhang   void             *work;
46da7a1d00SHong Zhang   PetscReal         rpg, rcond;
4775af56d4SHong Zhang   mem_usage_t       mem_usage;
4875af56d4SHong Zhang   MatStructure      flg;
495d8b2efaSHong Zhang   SuperLUStat_t     stat;
50cae5a23dSHong Zhang   Mat               A_dup;
51cae5a23dSHong Zhang   PetscScalar      *rhs_dup;
52c147c726SHong Zhang   GlobalLU_t        Glu;
532366e350SStefano Zampini   PetscBool         needconversion;
5414b396bbSKris Buschelman 
5514b396bbSKris Buschelman   /* Flag to clean up (non-global) SuperLU objects during Destroy */
56ace3abfcSBarry Smith   PetscBool CleanUpSuperLU;
57f0c56d0fSKris Buschelman } Mat_SuperLU;
5814b396bbSKris Buschelman 
595a46d3faSBarry Smith /*
605a46d3faSBarry Smith     Utility function
615a46d3faSBarry Smith */
629371c9d4SSatish Balay static PetscErrorCode MatView_Info_SuperLU(Mat A, PetscViewer viewer) {
63245fece9SBarry Smith   Mat_SuperLU      *lu = (Mat_SuperLU *)A->data;
645a46d3faSBarry Smith   superlu_options_t options;
655a46d3faSBarry Smith 
665a46d3faSBarry Smith   PetscFunctionBegin;
675a46d3faSBarry Smith   options = lu->options;
682205254eSKarl Rupp 
699566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU run parameters:\n"));
709566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  Equil: %s\n", (options.Equil != NO) ? "YES" : "NO"));
719566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  ColPerm: %" PetscInt_FMT "\n", options.ColPerm));
729566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  IterRefine: %" PetscInt_FMT "\n", options.IterRefine));
739566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  SymmetricMode: %s\n", (options.SymmetricMode != NO) ? "YES" : "NO"));
749566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  DiagPivotThresh: %g\n", options.DiagPivotThresh));
759566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  PivotGrowth: %s\n", (options.PivotGrowth != NO) ? "YES" : "NO"));
769566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  ConditionNumber: %s\n", (options.ConditionNumber != NO) ? "YES" : "NO"));
779566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  RowPerm: %" PetscInt_FMT "\n", options.RowPerm));
789566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  ReplaceTinyPivot: %s\n", (options.ReplaceTinyPivot != NO) ? "YES" : "NO"));
799566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  PrintStat: %s\n", (options.PrintStat != NO) ? "YES" : "NO"));
809566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  lwork: %" PetscInt_FMT "\n", lu->lwork));
81d5f3da31SBarry Smith   if (A->factortype == MAT_FACTOR_ILU) {
829566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_DropTol: %g\n", options.ILU_DropTol));
839566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_FillTol: %g\n", options.ILU_FillTol));
849566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_FillFactor: %g\n", options.ILU_FillFactor));
859566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_DropRule: %" PetscInt_FMT "\n", options.ILU_DropRule));
869566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_Norm: %" PetscInt_FMT "\n", options.ILU_Norm));
879566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_MILU: %" PetscInt_FMT "\n", options.ILU_MILU));
88cffbb591SHong Zhang   }
895a46d3faSBarry Smith   PetscFunctionReturn(0);
905a46d3faSBarry Smith }
915a46d3faSBarry Smith 
929371c9d4SSatish Balay PetscErrorCode MatSolve_SuperLU_Private(Mat A, Vec b, Vec x) {
93245fece9SBarry Smith   Mat_SuperLU       *lu = (Mat_SuperLU *)A->data;
94245fece9SBarry Smith   const PetscScalar *barray;
95245fece9SBarry Smith   PetscScalar       *xarray;
96245fece9SBarry Smith   PetscInt           info, i, n;
97245fece9SBarry Smith   PetscReal          ferr, berr;
98245fece9SBarry Smith   static PetscBool   cite = PETSC_FALSE;
99245fece9SBarry Smith 
100245fece9SBarry Smith   PetscFunctionBegin;
101245fece9SBarry Smith   if (lu->lwork == -1) PetscFunctionReturn(0);
1029371c9d4SSatish Balay   PetscCall(PetscCitationsRegister("@article{superlu99,\n  author  = {James W. Demmel and Stanley C. Eisenstat and\n             John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n  title = {A supernodal approach to sparse partial "
1039371c9d4SSatish Balay                                    "pivoting},\n  journal = {SIAM J. Matrix Analysis and Applications},\n  year = {1999},\n  volume  = {20},\n  number = {3},\n  pages = {720-755}\n}\n",
1049371c9d4SSatish Balay                                    &cite));
105245fece9SBarry Smith 
1069566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(x, &n));
107245fece9SBarry Smith   lu->B.ncol = 1; /* Set the number of right-hand side */
108245fece9SBarry Smith   if (lu->options.Equil && !lu->rhs_dup) {
109245fece9SBarry Smith     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
1109566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n, &lu->rhs_dup));
111245fece9SBarry Smith   }
112245fece9SBarry Smith   if (lu->options.Equil) {
113245fece9SBarry Smith     /* Copy b into rsh_dup */
1149566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(b, &barray));
1159566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(lu->rhs_dup, barray, n));
1169566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(b, &barray));
117245fece9SBarry Smith     barray = lu->rhs_dup;
118245fece9SBarry Smith   } else {
1199566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(b, &barray));
120245fece9SBarry Smith   }
1219566063dSJacob Faibussowitsch   PetscCall(VecGetArray(x, &xarray));
122245fece9SBarry Smith 
123245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
124245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
125245fece9SBarry Smith   ((DNformat *)lu->B.Store)->nzval = (singlecomplex *)barray;
126245fece9SBarry Smith   ((DNformat *)lu->X.Store)->nzval = (singlecomplex *)xarray;
127245fece9SBarry Smith #else
128245fece9SBarry Smith   ((DNformat *)lu->B.Store)->nzval = (doublecomplex *)barray;
129245fece9SBarry Smith   ((DNformat *)lu->X.Store)->nzval = (doublecomplex *)xarray;
130245fece9SBarry Smith #endif
131245fece9SBarry Smith #else
132245fece9SBarry Smith   ((DNformat *)lu->B.Store)->nzval = (void *)barray;
133245fece9SBarry Smith   ((DNformat *)lu->X.Store)->nzval = xarray;
134245fece9SBarry Smith #endif
135245fece9SBarry Smith 
136245fece9SBarry Smith   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
137245fece9SBarry Smith   if (A->factortype == MAT_FACTOR_LU) {
138245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
139245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
1409371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:cgssvx", cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
141245fece9SBarry Smith #else
1429371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:zgssvx", zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
143245fece9SBarry Smith #endif
144245fece9SBarry Smith #else
145245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
1469371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:sgssvx", sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
147245fece9SBarry Smith #else
1489371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:dgssvx", dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
149245fece9SBarry Smith #endif
150245fece9SBarry Smith #endif
151245fece9SBarry Smith   } else if (A->factortype == MAT_FACTOR_ILU) {
152245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
153245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
1549371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:cgsisx", cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
155245fece9SBarry Smith #else
1569371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:zgsisx", zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
157245fece9SBarry Smith #endif
158245fece9SBarry Smith #else
159245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
1609371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:sgsisx", sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
161245fece9SBarry Smith #else
1629371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:dgsisx", dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
163245fece9SBarry Smith #endif
164245fece9SBarry Smith #endif
165245fece9SBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
166*48a46eb9SPierre Jolivet   if (!lu->options.Equil) PetscCall(VecRestoreArrayRead(b, &barray));
1679566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(x, &xarray));
168245fece9SBarry Smith 
169245fece9SBarry Smith   if (!info || info == lu->A.ncol + 1) {
170245fece9SBarry Smith     if (lu->options.IterRefine) {
1719566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF, "Iterative Refinement:\n"));
1729566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF, "  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"));
1739566063dSJacob Faibussowitsch       for (i = 0; i < 1; ++i) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  %8d%8d%16e%16e\n", i + 1, lu->stat.RefineSteps, ferr, berr));
174245fece9SBarry Smith     }
175245fece9SBarry Smith   } else if (info > 0) {
176245fece9SBarry Smith     if (lu->lwork == -1) {
1779566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF, "  ** Estimated memory: %" PetscInt_FMT " bytes\n", info - lu->A.ncol));
178245fece9SBarry Smith     } else {
1799566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Warning: gssvx() returns info %" PetscInt_FMT "\n", info));
180245fece9SBarry Smith     }
1815f80ce2aSJacob Faibussowitsch   } else PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", info, -info);
182245fece9SBarry Smith 
183245fece9SBarry Smith   if (lu->options.PrintStat) {
1849566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "MatSolve__SuperLU():\n"));
185e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:StatPrint", StatPrint(&lu->stat));
186245fece9SBarry Smith   }
187245fece9SBarry Smith   PetscFunctionReturn(0);
188245fece9SBarry Smith }
189245fece9SBarry Smith 
1909371c9d4SSatish Balay PetscErrorCode MatSolve_SuperLU(Mat A, Vec b, Vec x) {
191245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
192245fece9SBarry Smith 
193245fece9SBarry Smith   PetscFunctionBegin;
194603e8f96SBarry Smith   if (A->factorerrortype) {
1959566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, skip\n"));
1969566063dSJacob Faibussowitsch     PetscCall(VecSetInf(x));
197245fece9SBarry Smith     PetscFunctionReturn(0);
198245fece9SBarry Smith   }
199245fece9SBarry Smith 
200245fece9SBarry Smith   lu->options.Trans = TRANS;
2019566063dSJacob Faibussowitsch   PetscCall(MatSolve_SuperLU_Private(A, b, x));
202245fece9SBarry Smith   PetscFunctionReturn(0);
203245fece9SBarry Smith }
204245fece9SBarry Smith 
2059371c9d4SSatish Balay PetscErrorCode MatSolveTranspose_SuperLU(Mat A, Vec b, Vec x) {
206245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
207245fece9SBarry Smith 
208245fece9SBarry Smith   PetscFunctionBegin;
209603e8f96SBarry Smith   if (A->factorerrortype) {
2109566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, skip\n"));
2119566063dSJacob Faibussowitsch     PetscCall(VecSetInf(x));
212245fece9SBarry Smith     PetscFunctionReturn(0);
213245fece9SBarry Smith   }
214245fece9SBarry Smith 
215245fece9SBarry Smith   lu->options.Trans = NOTRANS;
2169566063dSJacob Faibussowitsch   PetscCall(MatSolve_SuperLU_Private(A, b, x));
217245fece9SBarry Smith   PetscFunctionReturn(0);
218245fece9SBarry Smith }
219245fece9SBarry Smith 
2209371c9d4SSatish Balay static PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F, Mat A, const MatFactorInfo *info) {
221245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)F->data;
222cae5a23dSHong Zhang   Mat_SeqAIJ  *aa;
2235a46d3faSBarry Smith   PetscInt     sinfo;
2245a46d3faSBarry Smith   PetscReal    ferr, berr;
2255a46d3faSBarry Smith   NCformat    *Ustore;
2265a46d3faSBarry Smith   SCformat    *Lstore;
2275a46d3faSBarry Smith 
2285a46d3faSBarry Smith   PetscFunctionBegin;
2295a46d3faSBarry Smith   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
2305a46d3faSBarry Smith     lu->options.Fact = SamePattern;
2315a46d3faSBarry Smith     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
2325a46d3faSBarry Smith     Destroy_SuperMatrix_Store(&lu->A);
2331baa6e33SBarry Smith     if (lu->A_dup) PetscCall(MatCopy_SeqAIJ(A, lu->A_dup, SAME_NONZERO_PATTERN));
2342366e350SStefano Zampini 
2355a46d3faSBarry Smith     if (lu->lwork >= 0) {
236e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU:Destroy_SuperNode_Matrix", Destroy_SuperNode_Matrix(&lu->L));
237e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU:Destroy_CompCol_Matrix", Destroy_CompCol_Matrix(&lu->U));
2385a46d3faSBarry Smith       lu->options.Fact = SamePattern;
2395a46d3faSBarry Smith     }
2405a46d3faSBarry Smith   }
2415a46d3faSBarry Smith 
2425a46d3faSBarry Smith   /* Create the SuperMatrix for lu->A=A^T:
2435a46d3faSBarry Smith        Since SuperLU likes column-oriented matrices,we pass it the transpose,
2445a46d3faSBarry Smith        and then solve A^T X = B in MatSolve(). */
2452366e350SStefano Zampini   if (lu->A_dup) {
246cae5a23dSHong Zhang     aa = (Mat_SeqAIJ *)(lu->A_dup)->data;
247cae5a23dSHong Zhang   } else {
248cae5a23dSHong Zhang     aa = (Mat_SeqAIJ *)(A)->data;
249cae5a23dSHong Zhang   }
2505a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
2513cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
252e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:cCreate_CompCol_Matrix", cCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, (singlecomplex *)aa->a, aa->j, aa->i, SLU_NC, SLU_C, SLU_GE));
2533cb270beSHong Zhang #else
254e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:zCreate_CompCol_Matrix", zCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, (doublecomplex *)aa->a, aa->j, aa->i, SLU_NC, SLU_Z, SLU_GE));
2553cb270beSHong Zhang #endif
2563cb270beSHong Zhang #else
2573cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
258e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:sCreate_CompCol_Matrix", sCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, aa->a, aa->j, aa->i, SLU_NC, SLU_S, SLU_GE));
2595a46d3faSBarry Smith #else
260e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:dCreate_CompCol_Matrix", dCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, aa->a, aa->j, aa->i, SLU_NC, SLU_D, SLU_GE));
2615a46d3faSBarry Smith #endif
2623cb270beSHong Zhang #endif
2635a46d3faSBarry Smith 
2645a46d3faSBarry Smith   /* Numerical factorization */
2655a46d3faSBarry Smith   lu->B.ncol = 0; /* Indicate not to solve the system */
266d5f3da31SBarry Smith   if (F->factortype == MAT_FACTOR_LU) {
2675a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
2683cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
2699371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:cgssvx", cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2703cb270beSHong Zhang #else
2719371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:zgssvx", zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2723cb270beSHong Zhang #endif
2733cb270beSHong Zhang #else
2743cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
2759371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:sgssvx", sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2765a46d3faSBarry Smith #else
2779371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:dgssvx", dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2785a46d3faSBarry Smith #endif
2793cb270beSHong Zhang #endif
280d5f3da31SBarry Smith   } else if (F->factortype == MAT_FACTOR_ILU) {
281cffbb591SHong Zhang     /* Compute the incomplete factorization, condition number and pivot growth */
282cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX)
2833cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
2849371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:cgsisx", cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2853cb270beSHong Zhang #else
2869371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:zgsisx", zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2873cb270beSHong Zhang #endif
2883cb270beSHong Zhang #else
2893cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
2909371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:sgsisx", sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
291cffbb591SHong Zhang #else
2929371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:dgsisx", dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
293cffbb591SHong Zhang #endif
2943cb270beSHong Zhang #endif
295f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
2965a46d3faSBarry Smith   if (!sinfo || sinfo == lu->A.ncol + 1) {
297*48a46eb9SPierre Jolivet     if (lu->options.PivotGrowth) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Recip. pivot growth = %e\n", lu->rpg));
298*48a46eb9SPierre Jolivet     if (lu->options.ConditionNumber) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Recip. condition number = %e\n", lu->rcond));
2995a46d3faSBarry Smith   } else if (sinfo > 0) {
300675d1226SHong Zhang     if (A->erroriffailure) {
30198921bdaSJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %" PetscInt_FMT, sinfo);
302675d1226SHong Zhang     } else {
303675d1226SHong Zhang       if (sinfo <= lu->A.ncol) {
3049371c9d4SSatish Balay         if (lu->options.ILU_FillTol == 0.0) { F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; }
3059566063dSJacob Faibussowitsch         PetscCall(PetscInfo(F, "Number of zero pivots %" PetscInt_FMT ", ILU_FillTol %g\n", sinfo, lu->options.ILU_FillTol));
306675d1226SHong Zhang       } else if (sinfo == lu->A.ncol + 1) {
307675d1226SHong Zhang         /*
308675d1226SHong Zhang          U is nonsingular, but RCOND is less than machine
309675d1226SHong Zhang                       precision, meaning that the matrix is singular to
310675d1226SHong Zhang                       working precision. Nevertheless, the solution and
311675d1226SHong Zhang                       error bounds are computed because there are a number
312675d1226SHong Zhang                       of situations where the computed solution can be more
313675d1226SHong Zhang                       accurate than the value of RCOND would suggest.
314675d1226SHong Zhang          */
3159566063dSJacob Faibussowitsch         PetscCall(PetscInfo(F, "Matrix factor U is nonsingular, but is singular to working precision. The solution is computed. info %" PetscInt_FMT, sinfo));
316675d1226SHong Zhang       } else { /* sinfo > lu->A.ncol + 1 */
317603e8f96SBarry Smith         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
3189566063dSJacob Faibussowitsch         PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %" PetscInt_FMT "\n", sinfo));
319675d1226SHong Zhang       }
320675d1226SHong Zhang     }
32198921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", sinfo, -sinfo);
3225a46d3faSBarry Smith 
3235a46d3faSBarry Smith   if (lu->options.PrintStat) {
3249566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "MatLUFactorNumeric_SuperLU():\n"));
325e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:StatPrint", StatPrint(&lu->stat));
3265a46d3faSBarry Smith     Lstore = (SCformat *)lu->L.Store;
3275a46d3faSBarry Smith     Ustore = (NCformat *)lu->U.Store;
3289566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  No of nonzeros in factor L = %" PetscInt_FMT "\n", Lstore->nnz));
3299566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  No of nonzeros in factor U = %" PetscInt_FMT "\n", Ustore->nnz));
3309566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  No of nonzeros in L+U = %" PetscInt_FMT "\n", Lstore->nnz + Ustore->nnz - lu->A.ncol));
3319566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  L\\U MB %.3f\ttotal MB needed %.3f\n", lu->mem_usage.for_lu / 1e6, lu->mem_usage.total_needed / 1e6));
3325a46d3faSBarry Smith   }
3335a46d3faSBarry Smith 
3345a46d3faSBarry Smith   lu->flg                = SAME_NONZERO_PATTERN;
3351d5ca7f3SHong Zhang   F->ops->solve          = MatSolve_SuperLU;
3361d5ca7f3SHong Zhang   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
3371aef8b4cSStefano Zampini   F->ops->matsolve       = NULL;
3385a46d3faSBarry Smith   PetscFunctionReturn(0);
3395a46d3faSBarry Smith }
3405a46d3faSBarry Smith 
3419371c9d4SSatish Balay static PetscErrorCode MatDestroy_SuperLU(Mat A) {
342245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
34314b396bbSKris Buschelman 
34414b396bbSKris Buschelman   PetscFunctionBegin;
345245fece9SBarry Smith   if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
346e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->A));
347e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->B));
348e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->X));
349e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:StatFree", StatFree(&lu->stat));
3500e742b69SHong Zhang     if (lu->lwork >= 0) {
351e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU:Destroy_SuperNode_Matrix", Destroy_SuperNode_Matrix(&lu->L));
352e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU:Destroy_CompCol_Matrix", Destroy_CompCol_Matrix(&lu->U));
3530e742b69SHong Zhang     }
3540e742b69SHong Zhang   }
3559566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->etree));
3569566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->perm_r));
3579566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->perm_c));
3589566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->R));
3599566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->C));
3609566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->rhs_dup));
3619566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&lu->A_dup));
3629566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
3630e742b69SHong Zhang 
364d954db57SHong Zhang   /* clear composed functions */
3659566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
3669566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluSetILUDropTol_C", NULL));
36714b396bbSKris Buschelman   PetscFunctionReturn(0);
36814b396bbSKris Buschelman }
36914b396bbSKris Buschelman 
3709371c9d4SSatish Balay static PetscErrorCode MatView_SuperLU(Mat A, PetscViewer viewer) {
371ace3abfcSBarry Smith   PetscBool         iascii;
37214b396bbSKris Buschelman   PetscViewerFormat format;
37314b396bbSKris Buschelman 
37414b396bbSKris Buschelman   PetscFunctionBegin;
3759566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
37632077d6dSBarry Smith   if (iascii) {
3779566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
378*48a46eb9SPierre Jolivet     if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU(A, viewer));
37914b396bbSKris Buschelman   }
38014b396bbSKris Buschelman   PetscFunctionReturn(0);
38114b396bbSKris Buschelman }
38214b396bbSKris Buschelman 
3839371c9d4SSatish Balay PetscErrorCode MatMatSolve_SuperLU(Mat A, Mat B, Mat X) {
384245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
385cd723cd1SBarry Smith   PetscBool    flg;
386e0b74bf9SHong Zhang 
387e0b74bf9SHong Zhang   PetscFunctionBegin;
3889566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
3895f80ce2aSJacob Faibussowitsch   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
3909566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
3915f80ce2aSJacob Faibussowitsch   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
3922205254eSKarl Rupp   lu->options.Trans = TRANS;
393e0b74bf9SHong Zhang   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatMatSolve_SuperLU() is not implemented yet");
394e0b74bf9SHong Zhang   PetscFunctionReturn(0);
395e0b74bf9SHong Zhang }
396e0b74bf9SHong Zhang 
3979371c9d4SSatish Balay static PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) {
398245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)(F->data);
39926cc229bSBarry Smith   PetscInt     indx;
40026cc229bSBarry Smith   PetscBool    flg, set;
40126cc229bSBarry Smith   PetscReal    real_input;
40226cc229bSBarry Smith   const char  *colperm[]    = {"NATURAL", "MMD_ATA", "MMD_AT_PLUS_A", "COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
40326cc229bSBarry Smith   const char  *iterrefine[] = {"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
40426cc229bSBarry Smith   const char  *rowperm[]    = {"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
405b24902e0SBarry Smith 
406b24902e0SBarry Smith   PetscFunctionBegin;
40726cc229bSBarry Smith   /* Set options to F */
40826cc229bSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU Options", "Mat");
40926cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_equil", "Equil", "None", (PetscBool)lu->options.Equil, (PetscBool *)&lu->options.Equil, NULL));
41026cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_colperm", "ColPerm", "None", colperm, 4, colperm[3], &indx, &flg));
41126cc229bSBarry Smith   if (flg) lu->options.ColPerm = (colperm_t)indx;
41226cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_iterrefine", "IterRefine", "None", iterrefine, 4, iterrefine[0], &indx, &flg));
41326cc229bSBarry Smith   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
41426cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_symmetricmode", "SymmetricMode", "None", (PetscBool)lu->options.SymmetricMode, &flg, &set));
41526cc229bSBarry Smith   if (set && flg) lu->options.SymmetricMode = YES;
41626cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_diagpivotthresh", "DiagPivotThresh", "None", lu->options.DiagPivotThresh, &real_input, &flg));
41726cc229bSBarry Smith   if (flg) lu->options.DiagPivotThresh = (double)real_input;
41826cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_pivotgrowth", "PivotGrowth", "None", (PetscBool)lu->options.PivotGrowth, &flg, &set));
41926cc229bSBarry Smith   if (set && flg) lu->options.PivotGrowth = YES;
42026cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_conditionnumber", "ConditionNumber", "None", (PetscBool)lu->options.ConditionNumber, &flg, &set));
42126cc229bSBarry Smith   if (set && flg) lu->options.ConditionNumber = YES;
42226cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_rowperm", "rowperm", "None", rowperm, 2, rowperm[lu->options.RowPerm], &indx, &flg));
42326cc229bSBarry Smith   if (flg) lu->options.RowPerm = (rowperm_t)indx;
42426cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_replacetinypivot", "ReplaceTinyPivot", "None", (PetscBool)lu->options.ReplaceTinyPivot, &flg, &set));
42526cc229bSBarry Smith   if (set && flg) lu->options.ReplaceTinyPivot = YES;
42626cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_printstat", "PrintStat", "None", (PetscBool)lu->options.PrintStat, &flg, &set));
42726cc229bSBarry Smith   if (set && flg) lu->options.PrintStat = YES;
42826cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_lwork", "size of work array in bytes used by factorization", "None", lu->lwork, &lu->lwork, NULL));
42926cc229bSBarry Smith   if (lu->lwork > 0) {
43026cc229bSBarry Smith     /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/
43126cc229bSBarry Smith     PetscCall(PetscMalloc(lu->lwork, &lu->work));
43226cc229bSBarry Smith   } else if (lu->lwork != 0 && lu->lwork != -1) {
43326cc229bSBarry Smith     PetscCall(PetscPrintf(PETSC_COMM_SELF, "   Warning: lwork %" PetscInt_FMT " is not supported by SUPERLU. The default lwork=0 is used.\n", lu->lwork));
43426cc229bSBarry Smith     lu->lwork = 0;
43526cc229bSBarry Smith   }
43626cc229bSBarry Smith   /* ilu options */
43726cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_ilu_droptol", "ILU_DropTol", "None", lu->options.ILU_DropTol, &real_input, &flg));
43826cc229bSBarry Smith   if (flg) lu->options.ILU_DropTol = (double)real_input;
43926cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_ilu_filltol", "ILU_FillTol", "None", lu->options.ILU_FillTol, &real_input, &flg));
44026cc229bSBarry Smith   if (flg) lu->options.ILU_FillTol = (double)real_input;
44126cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_ilu_fillfactor", "ILU_FillFactor", "None", lu->options.ILU_FillFactor, &real_input, &flg));
44226cc229bSBarry Smith   if (flg) lu->options.ILU_FillFactor = (double)real_input;
44326cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_ilu_droprull", "ILU_DropRule", "None", lu->options.ILU_DropRule, &lu->options.ILU_DropRule, NULL));
44426cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_ilu_norm", "ILU_Norm", "None", lu->options.ILU_Norm, &indx, &flg));
44526cc229bSBarry Smith   if (flg) lu->options.ILU_Norm = (norm_t)indx;
44626cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_ilu_milu", "ILU_MILU", "None", lu->options.ILU_MILU, &indx, &flg));
44726cc229bSBarry Smith   if (flg) lu->options.ILU_MILU = (milu_t)indx;
44826cc229bSBarry Smith   PetscOptionsEnd();
44926cc229bSBarry Smith 
450b24902e0SBarry Smith   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
451b24902e0SBarry Smith   lu->CleanUpSuperLU      = PETSC_TRUE;
4521d5ca7f3SHong Zhang   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
4532366e350SStefano Zampini 
4542366e350SStefano Zampini   /* if we are here, the nonzero pattern has changed unless the user explicitly called MatLUFactorSymbolic */
4559566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&lu->A_dup));
456*48a46eb9SPierre Jolivet   if (lu->needconversion) PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &lu->A_dup));
4572366e350SStefano Zampini   if (lu->options.Equil == YES && !lu->A_dup) { /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
4589566063dSJacob Faibussowitsch     PetscCall(MatDuplicate_SeqAIJ(A, MAT_COPY_VALUES, &lu->A_dup));
4592366e350SStefano Zampini   }
460b24902e0SBarry Smith   PetscFunctionReturn(0);
461b24902e0SBarry Smith }
462b24902e0SBarry Smith 
4639371c9d4SSatish Balay static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F, PetscReal dtol) {
464245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)F->data;
4655ccb76cbSHong Zhang 
4665ccb76cbSHong Zhang   PetscFunctionBegin;
4675ccb76cbSHong Zhang   lu->options.ILU_DropTol = dtol;
4685ccb76cbSHong Zhang   PetscFunctionReturn(0);
4695ccb76cbSHong Zhang }
4705ccb76cbSHong Zhang 
4715ccb76cbSHong Zhang /*@
4725ccb76cbSHong Zhang   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
473147403d9SBarry Smith 
4745ccb76cbSHong Zhang    Logically Collective on Mat
4755ccb76cbSHong Zhang 
4765ccb76cbSHong Zhang    Input Parameters:
4775ccb76cbSHong Zhang +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
4785ccb76cbSHong Zhang -  dtol - drop tolerance
4795ccb76cbSHong Zhang 
4805ccb76cbSHong Zhang   Options Database:
481147403d9SBarry Smith .   -mat_superlu_ilu_droptol <dtol> - the drop tolerance
4825ccb76cbSHong Zhang 
4835ccb76cbSHong Zhang    Level: beginner
4845ccb76cbSHong Zhang 
48596a0c994SBarry Smith    References:
486606c0280SSatish Balay .  * - SuperLU Users' Guide
4875ccb76cbSHong Zhang 
488db781477SPatrick Sanan .seealso: `MatGetFactor()`
4895ccb76cbSHong Zhang @*/
4909371c9d4SSatish Balay PetscErrorCode MatSuperluSetILUDropTol(Mat F, PetscReal dtol) {
4915ccb76cbSHong Zhang   PetscFunctionBegin;
4925ccb76cbSHong Zhang   PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
49369fbec6eSBarry Smith   PetscValidLogicalCollectiveReal(F, dtol, 2);
494cac4c232SBarry Smith   PetscTryMethod(F, "MatSuperluSetILUDropTol_C", (Mat, PetscReal), (F, dtol));
4955ccb76cbSHong Zhang   PetscFunctionReturn(0);
4965ccb76cbSHong Zhang }
4975ccb76cbSHong Zhang 
4989371c9d4SSatish Balay PetscErrorCode MatFactorGetSolverType_seqaij_superlu(Mat A, MatSolverType *type) {
49935bd34faSBarry Smith   PetscFunctionBegin;
5002692d6eeSBarry Smith   *type = MATSOLVERSUPERLU;
50135bd34faSBarry Smith   PetscFunctionReturn(0);
50235bd34faSBarry Smith }
50335bd34faSBarry Smith 
504b24902e0SBarry Smith /*MC
505ba992d64SSatish Balay   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
506b24902e0SBarry Smith   via the external package SuperLU.
507b24902e0SBarry Smith 
508e2e64c6bSBarry Smith   Use ./configure --download-superlu to have PETSc installed with SuperLU
509b24902e0SBarry Smith 
5103ca39a21SBarry Smith   Use -pc_type lu -pc_factor_mat_solver_type superlu to use this direct solver
511c2b89b5dSBarry Smith 
512b24902e0SBarry Smith   Options Database Keys:
513e08999f5SMatthew G Knepley + -mat_superlu_equil <FALSE>            - Equil (None)
514e08999f5SMatthew G Knepley . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
515e08999f5SMatthew G Knepley . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
516e08999f5SMatthew G Knepley . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
517e08999f5SMatthew G Knepley . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
518e08999f5SMatthew G Knepley . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
519e08999f5SMatthew G Knepley . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
520e08999f5SMatthew G Knepley . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
521e08999f5SMatthew G Knepley . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
522e08999f5SMatthew G Knepley . -mat_superlu_printstat <FALSE>        - PrintStat (None)
523e08999f5SMatthew G Knepley . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
524e08999f5SMatthew G Knepley . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
525e08999f5SMatthew G Knepley . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
526e08999f5SMatthew G Knepley . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
527e08999f5SMatthew G Knepley . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
528e08999f5SMatthew G Knepley . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
529e08999f5SMatthew G Knepley - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
530b24902e0SBarry Smith 
53195452b02SPatrick Sanan    Notes:
53295452b02SPatrick Sanan     Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
5335c9eb25fSBarry Smith 
5342c7c0729SBarry Smith     Cannot currently use ordering provided by PETSc.
5352c7c0729SBarry Smith 
536b24902e0SBarry Smith    Level: beginner
537b24902e0SBarry Smith 
538db781477SPatrick Sanan .seealso: `PCLU`, `PCILU`, `MATSOLVERSUPERLU_DIST`, `MATSOLVERMUMPS`, `PCFactorSetMatSolverType()`, `MatSolverType`
539b24902e0SBarry Smith M*/
540b24902e0SBarry Smith 
5419371c9d4SSatish Balay static PetscErrorCode MatGetFactor_seqaij_superlu(Mat A, MatFactorType ftype, Mat *F) {
54214b396bbSKris Buschelman   Mat          B;
543f0c56d0fSKris Buschelman   Mat_SuperLU *lu;
54426cc229bSBarry Smith   PetscInt     m = A->rmap->n, n = A->cmap->n;
54514b396bbSKris Buschelman 
54614b396bbSKris Buschelman   PetscFunctionBegin;
5479566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
5489566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, PETSC_DETERMINE, PETSC_DETERMINE));
5499566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy("superlu", &((PetscObject)B)->type_name));
5509566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
55166e17bc3SBarry Smith   B->trivialsymbolic = PETSC_TRUE;
552cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
553b24902e0SBarry Smith     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
554cffbb591SHong Zhang     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
555b3a44c85SBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
556cffbb591SHong Zhang 
5579566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
5589566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSUPERLU, &B->solvertype));
55900c67f3bSHong Zhang 
560b9c12af5SBarry Smith   B->ops->getinfo = MatGetInfo_External;
561b24902e0SBarry Smith   B->ops->destroy = MatDestroy_SuperLU;
5623519fcdcSHong Zhang   B->ops->view    = MatView_SuperLU;
563d5f3da31SBarry Smith   B->factortype   = ftype;
56494b7f48cSBarry Smith   B->assembled    = PETSC_TRUE; /* required by -ksp_view */
5655c9eb25fSBarry Smith   B->preallocated = PETSC_TRUE;
56614b396bbSKris Buschelman 
5679566063dSJacob Faibussowitsch   PetscCall(PetscNewLog(B, &lu));
568cae5a23dSHong Zhang 
569cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU) {
5709ce50919SHong Zhang     set_default_options(&lu->options);
5713d6581fbSHong Zhang     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
5723d6581fbSHong Zhang       "Whether or not the system will be equilibrated depends on the
5733d6581fbSHong Zhang        scaling of the matrix A, but if equilibration is used, A is
5743d6581fbSHong Zhang        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
5753d6581fbSHong Zhang        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
5763d6581fbSHong Zhang      We set 'options.Equil = NO' as default because additional space is needed for it.
5773d6581fbSHong Zhang     */
5783d6581fbSHong Zhang     lu->options.Equil = NO;
579cffbb591SHong Zhang   } else if (ftype == MAT_FACTOR_ILU) {
5800c74a584SJed Brown     /* Set the default input options of ilu: */
581e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:ilu_set_default_options", ilu_set_default_options(&lu->options));
582cffbb591SHong Zhang   }
5839ce50919SHong Zhang   lu->options.PrintStat = NO;
5841a2e2f44SHong Zhang 
5855d8b2efaSHong Zhang   /* Initialize the statistics variables. */
586e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:StatInit", StatInit(&lu->stat));
5878914a3f7SHong Zhang   lu->lwork = 0; /* allocate space internally by system malloc */
5889ce50919SHong Zhang 
5895d8b2efaSHong Zhang   /* Allocate spaces (notice sizes are for the transpose) */
5909566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &lu->etree));
5919566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &lu->perm_r));
5929566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &lu->perm_c));
5939566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &lu->R));
5949566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &lu->C));
5955d8b2efaSHong Zhang 
5965d8b2efaSHong Zhang   /* create rhs and solution x without allocate space for .Store */
5975d8b2efaSHong Zhang #if defined(PETSC_USE_COMPLEX)
5983cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
599e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:cCreate_Dense_Matrix(", cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
600e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:cCreate_Dense_Matrix(", cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
6013cb270beSHong Zhang #else
602e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:zCreate_Dense_Matrix", zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
603e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:zCreate_Dense_Matrix", zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
6043cb270beSHong Zhang #endif
6053cb270beSHong Zhang #else
6063cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
607e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:sCreate_Dense_Matrix", sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
608e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:sCreate_Dense_Matrix", sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
6095d8b2efaSHong Zhang #else
610e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:dCreate_Dense_Matrix", dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
611e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:dCreate_Dense_Matrix", dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
6125d8b2efaSHong Zhang #endif
6133cb270beSHong Zhang #endif
6145d8b2efaSHong Zhang 
6159566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_seqaij_superlu));
6169566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluSetILUDropTol_C", MatSuperluSetILUDropTol_SuperLU));
617245fece9SBarry Smith   B->data = lu;
61820be8e61SHong Zhang 
619899d7b4fSKris Buschelman   *F = B;
62014b396bbSKris Buschelman   PetscFunctionReturn(0);
62114b396bbSKris Buschelman }
622d954db57SHong Zhang 
6239371c9d4SSatish Balay static PetscErrorCode MatGetFactor_seqsell_superlu(Mat A, MatFactorType ftype, Mat *F) {
6242366e350SStefano Zampini   Mat_SuperLU *lu;
6252366e350SStefano Zampini 
6262366e350SStefano Zampini   PetscFunctionBegin;
6279566063dSJacob Faibussowitsch   PetscCall(MatGetFactor_seqaij_superlu(A, ftype, F));
6282366e350SStefano Zampini   lu                 = (Mat_SuperLU *)((*F)->data);
6292366e350SStefano Zampini   lu->needconversion = PETSC_TRUE;
6302366e350SStefano Zampini   PetscFunctionReturn(0);
6312366e350SStefano Zampini }
6322366e350SStefano Zampini 
6339371c9d4SSatish Balay PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU(void) {
63442c9c57cSBarry Smith   PetscFunctionBegin;
6359566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_seqaij_superlu));
6369566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQAIJ, MAT_FACTOR_ILU, MatGetFactor_seqaij_superlu));
6379566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_seqsell_superlu));
6389566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQSELL, MAT_FACTOR_ILU, MatGetFactor_seqsell_superlu));
63942c9c57cSBarry Smith   PetscFunctionReturn(0);
64042c9c57cSBarry Smith }
641