xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision 3ba1676111f5c958fe6c2729b46ca4d523958bb3)
114b396bbSKris Buschelman 
25a46d3faSBarry Smith /*  --------------------------------------------------------------------
35a46d3faSBarry Smith 
45a46d3faSBarry Smith      This file implements a subclass of the SeqAIJ matrix class that uses
5c2b89b5dSBarry Smith      the SuperLU sparse solver.
614b396bbSKris Buschelman */
714b396bbSKris Buschelman 
85a46d3faSBarry Smith /*
95a46d3faSBarry Smith      Defines the data structure for the base matrix type (SeqAIJ)
105a46d3faSBarry Smith */
11c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/
1214b396bbSKris Buschelman 
135a46d3faSBarry Smith /*
145a46d3faSBarry Smith      SuperLU include files
155a46d3faSBarry Smith */
1614b396bbSKris Buschelman EXTERN_C_BEGIN
1714b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
183cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
193cb270beSHong Zhang     #include <slu_cdefs.h>
203cb270beSHong Zhang   #else
21c6db04a5SJed Brown     #include <slu_zdefs.h>
223cb270beSHong Zhang   #endif
233cb270beSHong Zhang #else
243cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
253cb270beSHong Zhang     #include <slu_sdefs.h>
2614b396bbSKris Buschelman   #else
27c6db04a5SJed Brown     #include <slu_ddefs.h>
2814b396bbSKris Buschelman   #endif
293cb270beSHong Zhang #endif
30c6db04a5SJed Brown #include <slu_util.h>
3114b396bbSKris Buschelman EXTERN_C_END
3214b396bbSKris Buschelman 
335a46d3faSBarry Smith /*
34245fece9SBarry Smith      This is the data that defines the SuperLU factored matrix type
355a46d3faSBarry Smith */
3614b396bbSKris Buschelman typedef struct {
3775af56d4SHong Zhang   SuperMatrix       A, L, U, B, X;
3875af56d4SHong Zhang   superlu_options_t options;
39da7a1d00SHong Zhang   PetscInt         *perm_c; /* column permutation vector */
40da7a1d00SHong Zhang   PetscInt         *perm_r; /* row permutations from partial pivoting */
41da7a1d00SHong Zhang   PetscInt         *etree;
42da7a1d00SHong Zhang   PetscReal        *R, *C;
4375af56d4SHong Zhang   char              equed[1];
44da7a1d00SHong Zhang   PetscInt          lwork;
4575af56d4SHong Zhang   void             *work;
46da7a1d00SHong Zhang   PetscReal         rpg, rcond;
4775af56d4SHong Zhang   mem_usage_t       mem_usage;
4875af56d4SHong Zhang   MatStructure      flg;
495d8b2efaSHong Zhang   SuperLUStat_t     stat;
50cae5a23dSHong Zhang   Mat               A_dup;
51cae5a23dSHong Zhang   PetscScalar      *rhs_dup;
52c147c726SHong Zhang   GlobalLU_t        Glu;
532366e350SStefano Zampini   PetscBool         needconversion;
5414b396bbSKris Buschelman 
5514b396bbSKris Buschelman   /* Flag to clean up (non-global) SuperLU objects during Destroy */
56ace3abfcSBarry Smith   PetscBool CleanUpSuperLU;
57f0c56d0fSKris Buschelman } Mat_SuperLU;
5814b396bbSKris Buschelman 
595a46d3faSBarry Smith /*
605a46d3faSBarry Smith     Utility function
615a46d3faSBarry Smith */
62d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_Info_SuperLU(Mat A, PetscViewer viewer)
63d71ae5a4SJacob Faibussowitsch {
64245fece9SBarry Smith   Mat_SuperLU      *lu = (Mat_SuperLU *)A->data;
655a46d3faSBarry Smith   superlu_options_t options;
665a46d3faSBarry Smith 
675a46d3faSBarry Smith   PetscFunctionBegin;
685a46d3faSBarry Smith   options = lu->options;
692205254eSKarl Rupp 
709566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU run parameters:\n"));
719566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  Equil: %s\n", (options.Equil != NO) ? "YES" : "NO"));
729566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  ColPerm: %" PetscInt_FMT "\n", options.ColPerm));
739566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  IterRefine: %" PetscInt_FMT "\n", options.IterRefine));
749566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  SymmetricMode: %s\n", (options.SymmetricMode != NO) ? "YES" : "NO"));
759566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  DiagPivotThresh: %g\n", options.DiagPivotThresh));
769566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  PivotGrowth: %s\n", (options.PivotGrowth != NO) ? "YES" : "NO"));
779566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  ConditionNumber: %s\n", (options.ConditionNumber != NO) ? "YES" : "NO"));
789566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  RowPerm: %" PetscInt_FMT "\n", options.RowPerm));
799566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  ReplaceTinyPivot: %s\n", (options.ReplaceTinyPivot != NO) ? "YES" : "NO"));
809566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  PrintStat: %s\n", (options.PrintStat != NO) ? "YES" : "NO"));
819566063dSJacob Faibussowitsch   PetscCall(PetscViewerASCIIPrintf(viewer, "  lwork: %" PetscInt_FMT "\n", lu->lwork));
82d5f3da31SBarry Smith   if (A->factortype == MAT_FACTOR_ILU) {
839566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_DropTol: %g\n", options.ILU_DropTol));
849566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_FillTol: %g\n", options.ILU_FillTol));
859566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_FillFactor: %g\n", options.ILU_FillFactor));
869566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_DropRule: %" PetscInt_FMT "\n", options.ILU_DropRule));
879566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_Norm: %" PetscInt_FMT "\n", options.ILU_Norm));
889566063dSJacob Faibussowitsch     PetscCall(PetscViewerASCIIPrintf(viewer, "  ILU_MILU: %" PetscInt_FMT "\n", options.ILU_MILU));
89cffbb591SHong Zhang   }
90*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
915a46d3faSBarry Smith }
925a46d3faSBarry Smith 
93d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SuperLU_Private(Mat A, Vec b, Vec x)
94d71ae5a4SJacob Faibussowitsch {
95245fece9SBarry Smith   Mat_SuperLU       *lu = (Mat_SuperLU *)A->data;
96245fece9SBarry Smith   const PetscScalar *barray;
97245fece9SBarry Smith   PetscScalar       *xarray;
98245fece9SBarry Smith   PetscInt           info, i, n;
99245fece9SBarry Smith   PetscReal          ferr, berr;
100245fece9SBarry Smith   static PetscBool   cite = PETSC_FALSE;
101245fece9SBarry Smith 
102245fece9SBarry Smith   PetscFunctionBegin;
103*3ba16761SJacob Faibussowitsch   if (lu->lwork == -1) PetscFunctionReturn(PETSC_SUCCESS);
1049371c9d4SSatish Balay   PetscCall(PetscCitationsRegister("@article{superlu99,\n  author  = {James W. Demmel and Stanley C. Eisenstat and\n             John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n  title = {A supernodal approach to sparse partial "
1059371c9d4SSatish Balay                                    "pivoting},\n  journal = {SIAM J. Matrix Analysis and Applications},\n  year = {1999},\n  volume  = {20},\n  number = {3},\n  pages = {720-755}\n}\n",
1069371c9d4SSatish Balay                                    &cite));
107245fece9SBarry Smith 
1089566063dSJacob Faibussowitsch   PetscCall(VecGetLocalSize(x, &n));
109245fece9SBarry Smith   lu->B.ncol = 1; /* Set the number of right-hand side */
110245fece9SBarry Smith   if (lu->options.Equil && !lu->rhs_dup) {
111245fece9SBarry Smith     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
1129566063dSJacob Faibussowitsch     PetscCall(PetscMalloc1(n, &lu->rhs_dup));
113245fece9SBarry Smith   }
114245fece9SBarry Smith   if (lu->options.Equil) {
115245fece9SBarry Smith     /* Copy b into rsh_dup */
1169566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(b, &barray));
1179566063dSJacob Faibussowitsch     PetscCall(PetscArraycpy(lu->rhs_dup, barray, n));
1189566063dSJacob Faibussowitsch     PetscCall(VecRestoreArrayRead(b, &barray));
119245fece9SBarry Smith     barray = lu->rhs_dup;
120245fece9SBarry Smith   } else {
1219566063dSJacob Faibussowitsch     PetscCall(VecGetArrayRead(b, &barray));
122245fece9SBarry Smith   }
1239566063dSJacob Faibussowitsch   PetscCall(VecGetArray(x, &xarray));
124245fece9SBarry Smith 
125245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
126245fece9SBarry Smith   #if defined(PETSC_USE_REAL_SINGLE)
127245fece9SBarry Smith   ((DNformat *)lu->B.Store)->nzval = (singlecomplex *)barray;
128245fece9SBarry Smith   ((DNformat *)lu->X.Store)->nzval = (singlecomplex *)xarray;
129245fece9SBarry Smith   #else
130245fece9SBarry Smith   ((DNformat *)lu->B.Store)->nzval = (doublecomplex *)barray;
131245fece9SBarry Smith   ((DNformat *)lu->X.Store)->nzval = (doublecomplex *)xarray;
132245fece9SBarry Smith   #endif
133245fece9SBarry Smith #else
134245fece9SBarry Smith   ((DNformat *)lu->B.Store)->nzval = (void *)barray;
135245fece9SBarry Smith   ((DNformat *)lu->X.Store)->nzval = xarray;
136245fece9SBarry Smith #endif
137245fece9SBarry Smith 
138245fece9SBarry Smith   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
139245fece9SBarry Smith   if (A->factortype == MAT_FACTOR_LU) {
140245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
141245fece9SBarry Smith   #if defined(PETSC_USE_REAL_SINGLE)
1429371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:cgssvx", cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
143245fece9SBarry Smith   #else
1449371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:zgssvx", zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
145245fece9SBarry Smith   #endif
146245fece9SBarry Smith #else
147245fece9SBarry Smith   #if defined(PETSC_USE_REAL_SINGLE)
1489371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:sgssvx", sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
149245fece9SBarry Smith   #else
1509371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:dgssvx", dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
151245fece9SBarry Smith   #endif
152245fece9SBarry Smith #endif
153245fece9SBarry Smith   } else if (A->factortype == MAT_FACTOR_ILU) {
154245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
155245fece9SBarry Smith   #if defined(PETSC_USE_REAL_SINGLE)
1569371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:cgsisx", cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
157245fece9SBarry Smith   #else
1589371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:zgsisx", zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
159245fece9SBarry Smith   #endif
160245fece9SBarry Smith #else
161245fece9SBarry Smith   #if defined(PETSC_USE_REAL_SINGLE)
1629371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:sgsisx", sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
163245fece9SBarry Smith   #else
1649371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:dgsisx", dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &info));
165245fece9SBarry Smith   #endif
166245fece9SBarry Smith #endif
167245fece9SBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
16848a46eb9SPierre Jolivet   if (!lu->options.Equil) PetscCall(VecRestoreArrayRead(b, &barray));
1699566063dSJacob Faibussowitsch   PetscCall(VecRestoreArray(x, &xarray));
170245fece9SBarry Smith 
171245fece9SBarry Smith   if (!info || info == lu->A.ncol + 1) {
172245fece9SBarry Smith     if (lu->options.IterRefine) {
1739566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF, "Iterative Refinement:\n"));
1749566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF, "  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"));
1759566063dSJacob Faibussowitsch       for (i = 0; i < 1; ++i) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  %8d%8d%16e%16e\n", i + 1, lu->stat.RefineSteps, ferr, berr));
176245fece9SBarry Smith     }
177245fece9SBarry Smith   } else if (info > 0) {
178245fece9SBarry Smith     if (lu->lwork == -1) {
1799566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF, "  ** Estimated memory: %" PetscInt_FMT " bytes\n", info - lu->A.ncol));
180245fece9SBarry Smith     } else {
1819566063dSJacob Faibussowitsch       PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Warning: gssvx() returns info %" PetscInt_FMT "\n", info));
182245fece9SBarry Smith     }
1835f80ce2aSJacob Faibussowitsch   } else PetscCheck(info >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", info, -info);
184245fece9SBarry Smith 
185245fece9SBarry Smith   if (lu->options.PrintStat) {
1869566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "MatSolve__SuperLU():\n"));
187e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:StatPrint", StatPrint(&lu->stat));
188245fece9SBarry Smith   }
189*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
190245fece9SBarry Smith }
191245fece9SBarry Smith 
192d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolve_SuperLU(Mat A, Vec b, Vec x)
193d71ae5a4SJacob Faibussowitsch {
194245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
19578bc9606SHong Zhang   trans_t      oldOption;
196245fece9SBarry Smith 
197245fece9SBarry Smith   PetscFunctionBegin;
198603e8f96SBarry Smith   if (A->factorerrortype) {
1999566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, skip\n"));
2009566063dSJacob Faibussowitsch     PetscCall(VecSetInf(x));
201*3ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
202245fece9SBarry Smith   }
203245fece9SBarry Smith 
20478bc9606SHong Zhang   oldOption         = lu->options.Trans;
205245fece9SBarry Smith   lu->options.Trans = TRANS;
2069566063dSJacob Faibussowitsch   PetscCall(MatSolve_SuperLU_Private(A, b, x));
20778bc9606SHong Zhang   lu->options.Trans = oldOption;
208*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
209245fece9SBarry Smith }
210245fece9SBarry Smith 
211d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSolveTranspose_SuperLU(Mat A, Vec b, Vec x)
212d71ae5a4SJacob Faibussowitsch {
213245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
21478bc9606SHong Zhang   trans_t      oldOption;
215245fece9SBarry Smith 
216245fece9SBarry Smith   PetscFunctionBegin;
217603e8f96SBarry Smith   if (A->factorerrortype) {
2189566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, skip\n"));
2199566063dSJacob Faibussowitsch     PetscCall(VecSetInf(x));
220*3ba16761SJacob Faibussowitsch     PetscFunctionReturn(PETSC_SUCCESS);
221245fece9SBarry Smith   }
222245fece9SBarry Smith 
22378bc9606SHong Zhang   oldOption         = lu->options.Trans;
224245fece9SBarry Smith   lu->options.Trans = NOTRANS;
2259566063dSJacob Faibussowitsch   PetscCall(MatSolve_SuperLU_Private(A, b, x));
22678bc9606SHong Zhang   lu->options.Trans = oldOption;
227*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
228245fece9SBarry Smith }
229245fece9SBarry Smith 
230d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F, Mat A, const MatFactorInfo *info)
231d71ae5a4SJacob Faibussowitsch {
232245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)F->data;
233cae5a23dSHong Zhang   Mat_SeqAIJ  *aa;
2345a46d3faSBarry Smith   PetscInt     sinfo;
2355a46d3faSBarry Smith   PetscReal    ferr, berr;
2365a46d3faSBarry Smith   NCformat    *Ustore;
2375a46d3faSBarry Smith   SCformat    *Lstore;
2385a46d3faSBarry Smith 
2395a46d3faSBarry Smith   PetscFunctionBegin;
2405a46d3faSBarry Smith   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
2415a46d3faSBarry Smith     lu->options.Fact = SamePattern;
2425a46d3faSBarry Smith     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
2435a46d3faSBarry Smith     Destroy_SuperMatrix_Store(&lu->A);
2441baa6e33SBarry Smith     if (lu->A_dup) PetscCall(MatCopy_SeqAIJ(A, lu->A_dup, SAME_NONZERO_PATTERN));
2452366e350SStefano Zampini 
2465a46d3faSBarry Smith     if (lu->lwork >= 0) {
247e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU:Destroy_SuperNode_Matrix", Destroy_SuperNode_Matrix(&lu->L));
248e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU:Destroy_CompCol_Matrix", Destroy_CompCol_Matrix(&lu->U));
2495a46d3faSBarry Smith       lu->options.Fact = SamePattern;
2505a46d3faSBarry Smith     }
2515a46d3faSBarry Smith   }
2525a46d3faSBarry Smith 
2535a46d3faSBarry Smith   /* Create the SuperMatrix for lu->A=A^T:
2545a46d3faSBarry Smith        Since SuperLU likes column-oriented matrices,we pass it the transpose,
2555a46d3faSBarry Smith        and then solve A^T X = B in MatSolve(). */
2562366e350SStefano Zampini   if (lu->A_dup) {
257cae5a23dSHong Zhang     aa = (Mat_SeqAIJ *)(lu->A_dup)->data;
258cae5a23dSHong Zhang   } else {
259cae5a23dSHong Zhang     aa = (Mat_SeqAIJ *)(A)->data;
260cae5a23dSHong Zhang   }
2615a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
2623cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
263e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:cCreate_CompCol_Matrix", cCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, (singlecomplex *)aa->a, aa->j, aa->i, SLU_NC, SLU_C, SLU_GE));
2643cb270beSHong Zhang   #else
265e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:zCreate_CompCol_Matrix", zCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, (doublecomplex *)aa->a, aa->j, aa->i, SLU_NC, SLU_Z, SLU_GE));
2663cb270beSHong Zhang   #endif
2673cb270beSHong Zhang #else
2683cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
269e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:sCreate_CompCol_Matrix", sCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, aa->a, aa->j, aa->i, SLU_NC, SLU_S, SLU_GE));
2705a46d3faSBarry Smith   #else
271e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:dCreate_CompCol_Matrix", dCreate_CompCol_Matrix(&lu->A, A->cmap->n, A->rmap->n, aa->nz, aa->a, aa->j, aa->i, SLU_NC, SLU_D, SLU_GE));
2725a46d3faSBarry Smith   #endif
2733cb270beSHong Zhang #endif
2745a46d3faSBarry Smith 
2755a46d3faSBarry Smith   /* Numerical factorization */
2765a46d3faSBarry Smith   lu->B.ncol = 0; /* Indicate not to solve the system */
277d5f3da31SBarry Smith   if (F->factortype == MAT_FACTOR_LU) {
2785a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
2793cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
2809371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:cgssvx", cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2813cb270beSHong Zhang   #else
2829371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:zgssvx", zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2833cb270beSHong Zhang   #endif
2843cb270beSHong Zhang #else
2853cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
2869371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:sgssvx", sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2875a46d3faSBarry Smith   #else
2889371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:dgssvx", dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2895a46d3faSBarry Smith   #endif
2903cb270beSHong Zhang #endif
291d5f3da31SBarry Smith   } else if (F->factortype == MAT_FACTOR_ILU) {
292cffbb591SHong Zhang     /* Compute the incomplete factorization, condition number and pivot growth */
293cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX)
2943cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
2959371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:cgsisx", cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2963cb270beSHong Zhang   #else
2979371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:zgsisx", zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
2983cb270beSHong Zhang   #endif
2993cb270beSHong Zhang #else
3003cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
3019371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:sgsisx", sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
302cffbb591SHong Zhang   #else
3039371c9d4SSatish Balay     PetscStackCallExternalVoid("SuperLU:dgsisx", dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
304cffbb591SHong Zhang   #endif
3053cb270beSHong Zhang #endif
306f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
3075a46d3faSBarry Smith   if (!sinfo || sinfo == lu->A.ncol + 1) {
30848a46eb9SPierre Jolivet     if (lu->options.PivotGrowth) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Recip. pivot growth = %e\n", lu->rpg));
30948a46eb9SPierre Jolivet     if (lu->options.ConditionNumber) PetscCall(PetscPrintf(PETSC_COMM_SELF, "  Recip. condition number = %e\n", lu->rcond));
3105a46d3faSBarry Smith   } else if (sinfo > 0) {
311675d1226SHong Zhang     if (A->erroriffailure) {
31298921bdaSJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %" PetscInt_FMT, sinfo);
313675d1226SHong Zhang     } else {
314675d1226SHong Zhang       if (sinfo <= lu->A.ncol) {
315ad540459SPierre Jolivet         if (lu->options.ILU_FillTol == 0.0) F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
3169566063dSJacob Faibussowitsch         PetscCall(PetscInfo(F, "Number of zero pivots %" PetscInt_FMT ", ILU_FillTol %g\n", sinfo, lu->options.ILU_FillTol));
317675d1226SHong Zhang       } else if (sinfo == lu->A.ncol + 1) {
318675d1226SHong Zhang         /*
319675d1226SHong Zhang          U is nonsingular, but RCOND is less than machine
320675d1226SHong Zhang                       precision, meaning that the matrix is singular to
321675d1226SHong Zhang                       working precision. Nevertheless, the solution and
322675d1226SHong Zhang                       error bounds are computed because there are a number
323675d1226SHong Zhang                       of situations where the computed solution can be more
324675d1226SHong Zhang                       accurate than the value of RCOND would suggest.
325675d1226SHong Zhang          */
3269566063dSJacob Faibussowitsch         PetscCall(PetscInfo(F, "Matrix factor U is nonsingular, but is singular to working precision. The solution is computed. info %" PetscInt_FMT, sinfo));
327675d1226SHong Zhang       } else { /* sinfo > lu->A.ncol + 1 */
328603e8f96SBarry Smith         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
3299566063dSJacob Faibussowitsch         PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %" PetscInt_FMT "\n", sinfo));
330675d1226SHong Zhang       }
331675d1226SHong Zhang     }
33298921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", sinfo, -sinfo);
3335a46d3faSBarry Smith 
3345a46d3faSBarry Smith   if (lu->options.PrintStat) {
3359566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "MatLUFactorNumeric_SuperLU():\n"));
336e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:StatPrint", StatPrint(&lu->stat));
3375a46d3faSBarry Smith     Lstore = (SCformat *)lu->L.Store;
3385a46d3faSBarry Smith     Ustore = (NCformat *)lu->U.Store;
3399566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  No of nonzeros in factor L = %" PetscInt_FMT "\n", Lstore->nnz));
3409566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  No of nonzeros in factor U = %" PetscInt_FMT "\n", Ustore->nnz));
3419566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  No of nonzeros in L+U = %" PetscInt_FMT "\n", Lstore->nnz + Ustore->nnz - lu->A.ncol));
3429566063dSJacob Faibussowitsch     PetscCall(PetscPrintf(PETSC_COMM_SELF, "  L\\U MB %.3f\ttotal MB needed %.3f\n", lu->mem_usage.for_lu / 1e6, lu->mem_usage.total_needed / 1e6));
3435a46d3faSBarry Smith   }
3445a46d3faSBarry Smith 
3455a46d3faSBarry Smith   lu->flg                = SAME_NONZERO_PATTERN;
3461d5ca7f3SHong Zhang   F->ops->solve          = MatSolve_SuperLU;
3471d5ca7f3SHong Zhang   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
3481aef8b4cSStefano Zampini   F->ops->matsolve       = NULL;
349*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
3505a46d3faSBarry Smith }
3515a46d3faSBarry Smith 
352d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatDestroy_SuperLU(Mat A)
353d71ae5a4SJacob Faibussowitsch {
354245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
35514b396bbSKris Buschelman 
35614b396bbSKris Buschelman   PetscFunctionBegin;
357245fece9SBarry Smith   if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
358e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->A));
359e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->B));
360e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:Destroy_SuperMatrix_Store", Destroy_SuperMatrix_Store(&lu->X));
361e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:StatFree", StatFree(&lu->stat));
3620e742b69SHong Zhang     if (lu->lwork >= 0) {
363e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU:Destroy_SuperNode_Matrix", Destroy_SuperNode_Matrix(&lu->L));
364e77caa6dSBarry Smith       PetscStackCallExternalVoid("SuperLU:Destroy_CompCol_Matrix", Destroy_CompCol_Matrix(&lu->U));
3650e742b69SHong Zhang     }
3660e742b69SHong Zhang   }
3679566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->etree));
3689566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->perm_r));
3699566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->perm_c));
3709566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->R));
3719566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->C));
3729566063dSJacob Faibussowitsch   PetscCall(PetscFree(lu->rhs_dup));
3739566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&lu->A_dup));
3749566063dSJacob Faibussowitsch   PetscCall(PetscFree(A->data));
3750e742b69SHong Zhang 
376d954db57SHong Zhang   /* clear composed functions */
3779566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL));
3789566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluSetILUDropTol_C", NULL));
379*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
38014b396bbSKris Buschelman }
38114b396bbSKris Buschelman 
382d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatView_SuperLU(Mat A, PetscViewer viewer)
383d71ae5a4SJacob Faibussowitsch {
384ace3abfcSBarry Smith   PetscBool         iascii;
38514b396bbSKris Buschelman   PetscViewerFormat format;
38614b396bbSKris Buschelman 
38714b396bbSKris Buschelman   PetscFunctionBegin;
3889566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
38932077d6dSBarry Smith   if (iascii) {
3909566063dSJacob Faibussowitsch     PetscCall(PetscViewerGetFormat(viewer, &format));
39148a46eb9SPierre Jolivet     if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU(A, viewer));
39214b396bbSKris Buschelman   }
393*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
39414b396bbSKris Buschelman }
39514b396bbSKris Buschelman 
396d71ae5a4SJacob Faibussowitsch PetscErrorCode MatMatSolve_SuperLU(Mat A, Mat B, Mat X)
397d71ae5a4SJacob Faibussowitsch {
398245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)A->data;
399cd723cd1SBarry Smith   PetscBool    flg;
400e0b74bf9SHong Zhang 
401e0b74bf9SHong Zhang   PetscFunctionBegin;
4029566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
4035f80ce2aSJacob Faibussowitsch   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix");
4049566063dSJacob Faibussowitsch   PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL));
4055f80ce2aSJacob Faibussowitsch   PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix");
4062205254eSKarl Rupp   lu->options.Trans = TRANS;
407e0b74bf9SHong Zhang   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "MatMatSolve_SuperLU() is not implemented yet");
408*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
409e0b74bf9SHong Zhang }
410e0b74bf9SHong Zhang 
411d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info)
412d71ae5a4SJacob Faibussowitsch {
413245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)(F->data);
41426cc229bSBarry Smith   PetscInt     indx;
41526cc229bSBarry Smith   PetscBool    flg, set;
41626cc229bSBarry Smith   PetscReal    real_input;
41726cc229bSBarry Smith   const char  *colperm[]    = {"NATURAL", "MMD_ATA", "MMD_AT_PLUS_A", "COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
41826cc229bSBarry Smith   const char  *iterrefine[] = {"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
41926cc229bSBarry Smith   const char  *rowperm[]    = {"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
420b24902e0SBarry Smith 
421b24902e0SBarry Smith   PetscFunctionBegin;
42226cc229bSBarry Smith   /* Set options to F */
42326cc229bSBarry Smith   PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU Options", "Mat");
42426cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_equil", "Equil", "None", (PetscBool)lu->options.Equil, (PetscBool *)&lu->options.Equil, NULL));
42526cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_colperm", "ColPerm", "None", colperm, 4, colperm[3], &indx, &flg));
42626cc229bSBarry Smith   if (flg) lu->options.ColPerm = (colperm_t)indx;
42726cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_iterrefine", "IterRefine", "None", iterrefine, 4, iterrefine[0], &indx, &flg));
42826cc229bSBarry Smith   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
42926cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_symmetricmode", "SymmetricMode", "None", (PetscBool)lu->options.SymmetricMode, &flg, &set));
43026cc229bSBarry Smith   if (set && flg) lu->options.SymmetricMode = YES;
43126cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_diagpivotthresh", "DiagPivotThresh", "None", lu->options.DiagPivotThresh, &real_input, &flg));
43226cc229bSBarry Smith   if (flg) lu->options.DiagPivotThresh = (double)real_input;
43326cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_pivotgrowth", "PivotGrowth", "None", (PetscBool)lu->options.PivotGrowth, &flg, &set));
43426cc229bSBarry Smith   if (set && flg) lu->options.PivotGrowth = YES;
43526cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_conditionnumber", "ConditionNumber", "None", (PetscBool)lu->options.ConditionNumber, &flg, &set));
43626cc229bSBarry Smith   if (set && flg) lu->options.ConditionNumber = YES;
43726cc229bSBarry Smith   PetscCall(PetscOptionsEList("-mat_superlu_rowperm", "rowperm", "None", rowperm, 2, rowperm[lu->options.RowPerm], &indx, &flg));
43826cc229bSBarry Smith   if (flg) lu->options.RowPerm = (rowperm_t)indx;
43926cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_replacetinypivot", "ReplaceTinyPivot", "None", (PetscBool)lu->options.ReplaceTinyPivot, &flg, &set));
44026cc229bSBarry Smith   if (set && flg) lu->options.ReplaceTinyPivot = YES;
44126cc229bSBarry Smith   PetscCall(PetscOptionsBool("-mat_superlu_printstat", "PrintStat", "None", (PetscBool)lu->options.PrintStat, &flg, &set));
44226cc229bSBarry Smith   if (set && flg) lu->options.PrintStat = YES;
44326cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_lwork", "size of work array in bytes used by factorization", "None", lu->lwork, &lu->lwork, NULL));
44426cc229bSBarry Smith   if (lu->lwork > 0) {
44526cc229bSBarry Smith     /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/
44626cc229bSBarry Smith     PetscCall(PetscMalloc(lu->lwork, &lu->work));
44726cc229bSBarry Smith   } else if (lu->lwork != 0 && lu->lwork != -1) {
44826cc229bSBarry Smith     PetscCall(PetscPrintf(PETSC_COMM_SELF, "   Warning: lwork %" PetscInt_FMT " is not supported by SUPERLU. The default lwork=0 is used.\n", lu->lwork));
44926cc229bSBarry Smith     lu->lwork = 0;
45026cc229bSBarry Smith   }
45126cc229bSBarry Smith   /* ilu options */
45226cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_ilu_droptol", "ILU_DropTol", "None", lu->options.ILU_DropTol, &real_input, &flg));
45326cc229bSBarry Smith   if (flg) lu->options.ILU_DropTol = (double)real_input;
45426cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_ilu_filltol", "ILU_FillTol", "None", lu->options.ILU_FillTol, &real_input, &flg));
45526cc229bSBarry Smith   if (flg) lu->options.ILU_FillTol = (double)real_input;
45626cc229bSBarry Smith   PetscCall(PetscOptionsReal("-mat_superlu_ilu_fillfactor", "ILU_FillFactor", "None", lu->options.ILU_FillFactor, &real_input, &flg));
45726cc229bSBarry Smith   if (flg) lu->options.ILU_FillFactor = (double)real_input;
45826cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_ilu_droprull", "ILU_DropRule", "None", lu->options.ILU_DropRule, &lu->options.ILU_DropRule, NULL));
45926cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_ilu_norm", "ILU_Norm", "None", lu->options.ILU_Norm, &indx, &flg));
46026cc229bSBarry Smith   if (flg) lu->options.ILU_Norm = (norm_t)indx;
46126cc229bSBarry Smith   PetscCall(PetscOptionsInt("-mat_superlu_ilu_milu", "ILU_MILU", "None", lu->options.ILU_MILU, &indx, &flg));
46226cc229bSBarry Smith   if (flg) lu->options.ILU_MILU = (milu_t)indx;
46326cc229bSBarry Smith   PetscOptionsEnd();
46426cc229bSBarry Smith 
465b24902e0SBarry Smith   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
466b24902e0SBarry Smith   lu->CleanUpSuperLU      = PETSC_TRUE;
4671d5ca7f3SHong Zhang   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
4682366e350SStefano Zampini 
4692366e350SStefano Zampini   /* if we are here, the nonzero pattern has changed unless the user explicitly called MatLUFactorSymbolic */
4709566063dSJacob Faibussowitsch   PetscCall(MatDestroy(&lu->A_dup));
47148a46eb9SPierre Jolivet   if (lu->needconversion) PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &lu->A_dup));
4722366e350SStefano Zampini   if (lu->options.Equil == YES && !lu->A_dup) { /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
4739566063dSJacob Faibussowitsch     PetscCall(MatDuplicate_SeqAIJ(A, MAT_COPY_VALUES, &lu->A_dup));
4742366e350SStefano Zampini   }
475*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
476b24902e0SBarry Smith }
477b24902e0SBarry Smith 
478d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F, PetscReal dtol)
479d71ae5a4SJacob Faibussowitsch {
480245fece9SBarry Smith   Mat_SuperLU *lu = (Mat_SuperLU *)F->data;
4815ccb76cbSHong Zhang 
4825ccb76cbSHong Zhang   PetscFunctionBegin;
4835ccb76cbSHong Zhang   lu->options.ILU_DropTol = dtol;
484*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
4855ccb76cbSHong Zhang }
4865ccb76cbSHong Zhang 
4875ccb76cbSHong Zhang /*@
4885ccb76cbSHong Zhang   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
489147403d9SBarry Smith 
490c3339decSBarry Smith    Logically Collective
4915ccb76cbSHong Zhang 
4925ccb76cbSHong Zhang    Input Parameters:
49311a5261eSBarry Smith +  F - the factored matrix obtained by calling `MatGetFactor()` from PETSc-SuperLU interface
4945ccb76cbSHong Zhang -  dtol - drop tolerance
4955ccb76cbSHong Zhang 
4963c7db156SBarry Smith   Options Database Key:
497147403d9SBarry Smith .   -mat_superlu_ilu_droptol <dtol> - the drop tolerance
4985ccb76cbSHong Zhang 
4995ccb76cbSHong Zhang    Level: beginner
5005ccb76cbSHong Zhang 
50196a0c994SBarry Smith    References:
502606c0280SSatish Balay .  * - SuperLU Users' Guide
5035ccb76cbSHong Zhang 
504db781477SPatrick Sanan .seealso: `MatGetFactor()`
5055ccb76cbSHong Zhang @*/
506d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSuperluSetILUDropTol(Mat F, PetscReal dtol)
507d71ae5a4SJacob Faibussowitsch {
5085ccb76cbSHong Zhang   PetscFunctionBegin;
5095ccb76cbSHong Zhang   PetscValidHeaderSpecific(F, MAT_CLASSID, 1);
51069fbec6eSBarry Smith   PetscValidLogicalCollectiveReal(F, dtol, 2);
511cac4c232SBarry Smith   PetscTryMethod(F, "MatSuperluSetILUDropTol_C", (Mat, PetscReal), (F, dtol));
512*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
5135ccb76cbSHong Zhang }
5145ccb76cbSHong Zhang 
515d71ae5a4SJacob Faibussowitsch PetscErrorCode MatFactorGetSolverType_seqaij_superlu(Mat A, MatSolverType *type)
516d71ae5a4SJacob Faibussowitsch {
51735bd34faSBarry Smith   PetscFunctionBegin;
5182692d6eeSBarry Smith   *type = MATSOLVERSUPERLU;
519*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
52035bd34faSBarry Smith }
52135bd34faSBarry Smith 
522b24902e0SBarry Smith /*MC
523ba992d64SSatish Balay   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
524b24902e0SBarry Smith   via the external package SuperLU.
525b24902e0SBarry Smith 
526e2e64c6bSBarry Smith   Use ./configure --download-superlu to have PETSc installed with SuperLU
527b24902e0SBarry Smith 
5283ca39a21SBarry Smith   Use -pc_type lu -pc_factor_mat_solver_type superlu to use this direct solver
529c2b89b5dSBarry Smith 
530b24902e0SBarry Smith   Options Database Keys:
531e08999f5SMatthew G Knepley + -mat_superlu_equil <FALSE>            - Equil (None)
532e08999f5SMatthew G Knepley . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
533e08999f5SMatthew G Knepley . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
534e08999f5SMatthew G Knepley . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
535e08999f5SMatthew G Knepley . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
536e08999f5SMatthew G Knepley . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
537e08999f5SMatthew G Knepley . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
538e08999f5SMatthew G Knepley . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
539e08999f5SMatthew G Knepley . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
540e08999f5SMatthew G Knepley . -mat_superlu_printstat <FALSE>        - PrintStat (None)
541e08999f5SMatthew G Knepley . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
542e08999f5SMatthew G Knepley . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
543e08999f5SMatthew G Knepley . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
544e08999f5SMatthew G Knepley . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
545e08999f5SMatthew G Knepley . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
546e08999f5SMatthew G Knepley . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
547e08999f5SMatthew G Knepley - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
548b24902e0SBarry Smith 
54995452b02SPatrick Sanan    Notes:
55011a5261eSBarry Smith     Do not confuse this with `MATSOLVERSUPERLU_DIST` which is for parallel sparse solves
5515c9eb25fSBarry Smith 
55211a5261eSBarry Smith     Cannot use ordering provided by PETSc, provides its own.
5532c7c0729SBarry Smith 
554b24902e0SBarry Smith    Level: beginner
555b24902e0SBarry Smith 
556db781477SPatrick Sanan .seealso: `PCLU`, `PCILU`, `MATSOLVERSUPERLU_DIST`, `MATSOLVERMUMPS`, `PCFactorSetMatSolverType()`, `MatSolverType`
557b24902e0SBarry Smith M*/
558b24902e0SBarry Smith 
559d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_seqaij_superlu(Mat A, MatFactorType ftype, Mat *F)
560d71ae5a4SJacob Faibussowitsch {
56114b396bbSKris Buschelman   Mat          B;
562f0c56d0fSKris Buschelman   Mat_SuperLU *lu;
56326cc229bSBarry Smith   PetscInt     m = A->rmap->n, n = A->cmap->n;
56414b396bbSKris Buschelman 
56514b396bbSKris Buschelman   PetscFunctionBegin;
5669566063dSJacob Faibussowitsch   PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B));
5679566063dSJacob Faibussowitsch   PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, PETSC_DETERMINE, PETSC_DETERMINE));
5689566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy("superlu", &((PetscObject)B)->type_name));
5699566063dSJacob Faibussowitsch   PetscCall(MatSetUp(B));
57066e17bc3SBarry Smith   B->trivialsymbolic = PETSC_TRUE;
571cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
572b24902e0SBarry Smith     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
573cffbb591SHong Zhang     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
574b3a44c85SBarry Smith   } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor type not supported");
575cffbb591SHong Zhang 
5769566063dSJacob Faibussowitsch   PetscCall(PetscFree(B->solvertype));
5779566063dSJacob Faibussowitsch   PetscCall(PetscStrallocpy(MATSOLVERSUPERLU, &B->solvertype));
57800c67f3bSHong Zhang 
579b9c12af5SBarry Smith   B->ops->getinfo = MatGetInfo_External;
580b24902e0SBarry Smith   B->ops->destroy = MatDestroy_SuperLU;
5813519fcdcSHong Zhang   B->ops->view    = MatView_SuperLU;
582d5f3da31SBarry Smith   B->factortype   = ftype;
58394b7f48cSBarry Smith   B->assembled    = PETSC_TRUE; /* required by -ksp_view */
5845c9eb25fSBarry Smith   B->preallocated = PETSC_TRUE;
58514b396bbSKris Buschelman 
5864dfa11a4SJacob Faibussowitsch   PetscCall(PetscNew(&lu));
587cae5a23dSHong Zhang 
588cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU) {
5899ce50919SHong Zhang     set_default_options(&lu->options);
5903d6581fbSHong Zhang     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
5913d6581fbSHong Zhang       "Whether or not the system will be equilibrated depends on the
5923d6581fbSHong Zhang        scaling of the matrix A, but if equilibration is used, A is
5933d6581fbSHong Zhang        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
5943d6581fbSHong Zhang        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
5953d6581fbSHong Zhang      We set 'options.Equil = NO' as default because additional space is needed for it.
5963d6581fbSHong Zhang     */
5973d6581fbSHong Zhang     lu->options.Equil = NO;
598cffbb591SHong Zhang   } else if (ftype == MAT_FACTOR_ILU) {
5990c74a584SJed Brown     /* Set the default input options of ilu: */
600e77caa6dSBarry Smith     PetscStackCallExternalVoid("SuperLU:ilu_set_default_options", ilu_set_default_options(&lu->options));
601cffbb591SHong Zhang   }
6029ce50919SHong Zhang   lu->options.PrintStat = NO;
6031a2e2f44SHong Zhang 
6045d8b2efaSHong Zhang   /* Initialize the statistics variables. */
605e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:StatInit", StatInit(&lu->stat));
6068914a3f7SHong Zhang   lu->lwork = 0; /* allocate space internally by system malloc */
6079ce50919SHong Zhang 
6085d8b2efaSHong Zhang   /* Allocate spaces (notice sizes are for the transpose) */
6099566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &lu->etree));
6109566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &lu->perm_r));
6119566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &lu->perm_c));
6129566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n, &lu->R));
6139566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(m, &lu->C));
6145d8b2efaSHong Zhang 
6155d8b2efaSHong Zhang   /* create rhs and solution x without allocate space for .Store */
6165d8b2efaSHong Zhang #if defined(PETSC_USE_COMPLEX)
6173cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
618e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:cCreate_Dense_Matrix(", cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
619e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:cCreate_Dense_Matrix(", cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
6203cb270beSHong Zhang   #else
621e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:zCreate_Dense_Matrix", zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
622e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:zCreate_Dense_Matrix", zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
6233cb270beSHong Zhang   #endif
6243cb270beSHong Zhang #else
6253cb270beSHong Zhang   #if defined(PETSC_USE_REAL_SINGLE)
626e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:sCreate_Dense_Matrix", sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
627e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:sCreate_Dense_Matrix", sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
6285d8b2efaSHong Zhang   #else
629e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:dCreate_Dense_Matrix", dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
630e77caa6dSBarry Smith   PetscStackCallExternalVoid("SuperLU:dCreate_Dense_Matrix", dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
6315d8b2efaSHong Zhang   #endif
6323cb270beSHong Zhang #endif
6335d8b2efaSHong Zhang 
6349566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_seqaij_superlu));
6359566063dSJacob Faibussowitsch   PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluSetILUDropTol_C", MatSuperluSetILUDropTol_SuperLU));
636245fece9SBarry Smith   B->data = lu;
63720be8e61SHong Zhang 
638899d7b4fSKris Buschelman   *F = B;
639*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
64014b396bbSKris Buschelman }
641d954db57SHong Zhang 
642d71ae5a4SJacob Faibussowitsch static PetscErrorCode MatGetFactor_seqsell_superlu(Mat A, MatFactorType ftype, Mat *F)
643d71ae5a4SJacob Faibussowitsch {
6442366e350SStefano Zampini   Mat_SuperLU *lu;
6452366e350SStefano Zampini 
6462366e350SStefano Zampini   PetscFunctionBegin;
6479566063dSJacob Faibussowitsch   PetscCall(MatGetFactor_seqaij_superlu(A, ftype, F));
6482366e350SStefano Zampini   lu                 = (Mat_SuperLU *)((*F)->data);
6492366e350SStefano Zampini   lu->needconversion = PETSC_TRUE;
650*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
6512366e350SStefano Zampini }
6522366e350SStefano Zampini 
653d71ae5a4SJacob Faibussowitsch PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU(void)
654d71ae5a4SJacob Faibussowitsch {
65542c9c57cSBarry Smith   PetscFunctionBegin;
6569566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_seqaij_superlu));
6579566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQAIJ, MAT_FACTOR_ILU, MatGetFactor_seqaij_superlu));
6589566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_seqsell_superlu));
6599566063dSJacob Faibussowitsch   PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU, MATSEQSELL, MAT_FACTOR_ILU, MatGetFactor_seqsell_superlu));
660*3ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
66142c9c57cSBarry Smith }
662