xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision 147403d98689287189d69e47992b7c8152b2c9da)
114b396bbSKris Buschelman 
25a46d3faSBarry Smith /*  --------------------------------------------------------------------
35a46d3faSBarry Smith 
45a46d3faSBarry Smith      This file implements a subclass of the SeqAIJ matrix class that uses
5c2b89b5dSBarry Smith      the SuperLU sparse solver.
614b396bbSKris Buschelman */
714b396bbSKris Buschelman 
85a46d3faSBarry Smith /*
95a46d3faSBarry Smith      Defines the data structure for the base matrix type (SeqAIJ)
105a46d3faSBarry Smith */
11c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
1214b396bbSKris Buschelman 
135a46d3faSBarry Smith /*
145a46d3faSBarry Smith      SuperLU include files
155a46d3faSBarry Smith */
1614b396bbSKris Buschelman EXTERN_C_BEGIN
1714b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
183cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
193cb270beSHong Zhang #include <slu_cdefs.h>
203cb270beSHong Zhang #else
21c6db04a5SJed Brown #include <slu_zdefs.h>
223cb270beSHong Zhang #endif
233cb270beSHong Zhang #else
243cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
253cb270beSHong Zhang #include <slu_sdefs.h>
2614b396bbSKris Buschelman #else
27c6db04a5SJed Brown #include <slu_ddefs.h>
2814b396bbSKris Buschelman #endif
293cb270beSHong Zhang #endif
30c6db04a5SJed Brown #include <slu_util.h>
3114b396bbSKris Buschelman EXTERN_C_END
3214b396bbSKris Buschelman 
335a46d3faSBarry Smith /*
34245fece9SBarry Smith      This is the data that defines the SuperLU factored matrix type
355a46d3faSBarry Smith */
3614b396bbSKris Buschelman typedef struct {
3775af56d4SHong Zhang   SuperMatrix       A,L,U,B,X;
3875af56d4SHong Zhang   superlu_options_t options;
39da7a1d00SHong Zhang   PetscInt          *perm_c; /* column permutation vector */
40da7a1d00SHong Zhang   PetscInt          *perm_r; /* row permutations from partial pivoting */
41da7a1d00SHong Zhang   PetscInt          *etree;
42da7a1d00SHong Zhang   PetscReal         *R, *C;
4375af56d4SHong Zhang   char              equed[1];
44da7a1d00SHong Zhang   PetscInt          lwork;
4575af56d4SHong Zhang   void              *work;
46da7a1d00SHong Zhang   PetscReal         rpg, rcond;
4775af56d4SHong Zhang   mem_usage_t       mem_usage;
4875af56d4SHong Zhang   MatStructure      flg;
495d8b2efaSHong Zhang   SuperLUStat_t     stat;
50cae5a23dSHong Zhang   Mat               A_dup;
51cae5a23dSHong Zhang   PetscScalar       *rhs_dup;
52c147c726SHong Zhang   GlobalLU_t        Glu;
532366e350SStefano Zampini   PetscBool         needconversion;
5414b396bbSKris Buschelman 
5514b396bbSKris Buschelman   /* Flag to clean up (non-global) SuperLU objects during Destroy */
56ace3abfcSBarry Smith   PetscBool CleanUpSuperLU;
57f0c56d0fSKris Buschelman } Mat_SuperLU;
5814b396bbSKris Buschelman 
595a46d3faSBarry Smith /*
605a46d3faSBarry Smith     Utility function
615a46d3faSBarry Smith */
62860c79edSBarry Smith static PetscErrorCode MatView_Info_SuperLU(Mat A,PetscViewer viewer)
635a46d3faSBarry Smith {
64245fece9SBarry Smith   Mat_SuperLU       *lu= (Mat_SuperLU*)A->data;
655a46d3faSBarry Smith   PetscErrorCode    ierr;
665a46d3faSBarry Smith   superlu_options_t options;
675a46d3faSBarry Smith 
685a46d3faSBarry Smith   PetscFunctionBegin;
695a46d3faSBarry Smith   options = lu->options;
702205254eSKarl Rupp 
715a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
725a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES" : "NO");CHKERRQ(ierr);
73c0aa6a63SJacob Faibussowitsch   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %" PetscInt_FMT "\n",options.ColPerm);CHKERRQ(ierr);
74c0aa6a63SJacob Faibussowitsch   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %" PetscInt_FMT "\n",options.IterRefine);CHKERRQ(ierr);
755a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES" : "NO");CHKERRQ(ierr);
765a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
775a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES" : "NO");CHKERRQ(ierr);
785a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES" : "NO");CHKERRQ(ierr);
79c0aa6a63SJacob Faibussowitsch   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %" PetscInt_FMT "\n",options.RowPerm);CHKERRQ(ierr);
805a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES" : "NO");CHKERRQ(ierr);
815a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES" : "NO");CHKERRQ(ierr);
82c0aa6a63SJacob Faibussowitsch   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %" PetscInt_FMT "\n",lu->lwork);CHKERRQ(ierr);
83d5f3da31SBarry Smith   if (A->factortype == MAT_FACTOR_ILU) {
84cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
85cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
86cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
87c0aa6a63SJacob Faibussowitsch     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %" PetscInt_FMT "\n",options.ILU_DropRule);CHKERRQ(ierr);
88c0aa6a63SJacob Faibussowitsch     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %" PetscInt_FMT "\n",options.ILU_Norm);CHKERRQ(ierr);
89c0aa6a63SJacob Faibussowitsch     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %" PetscInt_FMT "\n",options.ILU_MILU);CHKERRQ(ierr);
90cffbb591SHong Zhang   }
915a46d3faSBarry Smith   PetscFunctionReturn(0);
925a46d3faSBarry Smith }
935a46d3faSBarry Smith 
94245fece9SBarry Smith PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
95245fece9SBarry Smith {
96245fece9SBarry Smith   Mat_SuperLU       *lu = (Mat_SuperLU*)A->data;
97245fece9SBarry Smith   const PetscScalar *barray;
98245fece9SBarry Smith   PetscScalar       *xarray;
99245fece9SBarry Smith   PetscErrorCode    ierr;
100245fece9SBarry Smith   PetscInt          info,i,n;
101245fece9SBarry Smith   PetscReal         ferr,berr;
102245fece9SBarry Smith   static PetscBool  cite = PETSC_FALSE;
103245fece9SBarry Smith 
104245fece9SBarry Smith   PetscFunctionBegin;
105245fece9SBarry Smith   if (lu->lwork == -1) PetscFunctionReturn(0);
106245fece9SBarry Smith   ierr = PetscCitationsRegister("@article{superlu99,\n  author  = {James W. Demmel and Stanley C. Eisenstat and\n             John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n  title = {A supernodal approach to sparse partial pivoting},\n  journal = {SIAM J. Matrix Analysis and Applications},\n  year = {1999},\n  volume  = {20},\n  number = {3},\n  pages = {720-755}\n}\n",&cite);CHKERRQ(ierr);
107245fece9SBarry Smith 
108245fece9SBarry Smith   ierr = VecGetLocalSize(x,&n);CHKERRQ(ierr);
109245fece9SBarry Smith   lu->B.ncol = 1;   /* Set the number of right-hand side */
110245fece9SBarry Smith   if (lu->options.Equil && !lu->rhs_dup) {
111245fece9SBarry Smith     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
112245fece9SBarry Smith     ierr = PetscMalloc1(n,&lu->rhs_dup);CHKERRQ(ierr);
113245fece9SBarry Smith   }
114245fece9SBarry Smith   if (lu->options.Equil) {
115245fece9SBarry Smith     /* Copy b into rsh_dup */
116245fece9SBarry Smith     ierr   = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
117580bdb30SBarry Smith     ierr   = PetscArraycpy(lu->rhs_dup,barray,n);CHKERRQ(ierr);
118245fece9SBarry Smith     ierr   = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
119245fece9SBarry Smith     barray = lu->rhs_dup;
120245fece9SBarry Smith   } else {
121245fece9SBarry Smith     ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
122245fece9SBarry Smith   }
123245fece9SBarry Smith   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
124245fece9SBarry Smith 
125245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
126245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
127245fece9SBarry Smith   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
128245fece9SBarry Smith   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
129245fece9SBarry Smith #else
130245fece9SBarry Smith   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
131245fece9SBarry Smith   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
132245fece9SBarry Smith #endif
133245fece9SBarry Smith #else
134245fece9SBarry Smith   ((DNformat*)lu->B.Store)->nzval = (void*)barray;
135245fece9SBarry Smith   ((DNformat*)lu->X.Store)->nzval = xarray;
136245fece9SBarry Smith #endif
137245fece9SBarry Smith 
138245fece9SBarry Smith   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
139245fece9SBarry Smith   if (A->factortype == MAT_FACTOR_LU) {
140245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
141245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
142245fece9SBarry Smith     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
143245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
144245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
145245fece9SBarry Smith #else
146245fece9SBarry Smith     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
147245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
148245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
149245fece9SBarry Smith #endif
150245fece9SBarry Smith #else
151245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
152245fece9SBarry Smith     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
153245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
154245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
155245fece9SBarry Smith #else
156245fece9SBarry Smith     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
157245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
158245fece9SBarry Smith                                      &lu->Glu,&lu->mem_usage, &lu->stat, &info));
159245fece9SBarry Smith #endif
160245fece9SBarry Smith #endif
161245fece9SBarry Smith   } else if (A->factortype == MAT_FACTOR_ILU) {
162245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX)
163245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
164245fece9SBarry Smith     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
165245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
166245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
167245fece9SBarry Smith #else
168245fece9SBarry Smith     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
169245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
170245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
171245fece9SBarry Smith #endif
172245fece9SBarry Smith #else
173245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE)
174245fece9SBarry Smith     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
175245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
176245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
177245fece9SBarry Smith #else
178245fece9SBarry Smith     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
179245fece9SBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
180245fece9SBarry Smith                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
181245fece9SBarry Smith #endif
182245fece9SBarry Smith #endif
183245fece9SBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
184245fece9SBarry Smith   if (!lu->options.Equil) {
185245fece9SBarry Smith     ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
186245fece9SBarry Smith   }
187245fece9SBarry Smith   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
188245fece9SBarry Smith 
189245fece9SBarry Smith   if (!info || info == lu->A.ncol+1) {
190245fece9SBarry Smith     if (lu->options.IterRefine) {
191245fece9SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
192245fece9SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
193245fece9SBarry Smith       for (i = 0; i < 1; ++i) {
194245fece9SBarry Smith         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
195245fece9SBarry Smith       }
196245fece9SBarry Smith     }
197245fece9SBarry Smith   } else if (info > 0) {
198245fece9SBarry Smith     if (lu->lwork == -1) {
199c0aa6a63SJacob Faibussowitsch       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %" PetscInt_FMT " bytes\n", info - lu->A.ncol);
200245fece9SBarry Smith     } else {
201c0aa6a63SJacob Faibussowitsch       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %" PetscInt_FMT "\n",info);
202245fece9SBarry Smith     }
2032c71b3e2SJacob Faibussowitsch   } else PetscCheckFalse(info < 0,PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", info,-info);
204245fece9SBarry Smith 
205245fece9SBarry Smith   if (lu->options.PrintStat) {
206245fece9SBarry Smith     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
207245fece9SBarry Smith     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
208245fece9SBarry Smith   }
209245fece9SBarry Smith   PetscFunctionReturn(0);
210245fece9SBarry Smith }
211245fece9SBarry Smith 
212245fece9SBarry Smith PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
213245fece9SBarry Smith {
214245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)A->data;
215245fece9SBarry Smith   PetscErrorCode ierr;
216245fece9SBarry Smith 
217245fece9SBarry Smith   PetscFunctionBegin;
218603e8f96SBarry Smith   if (A->factorerrortype) {
219245fece9SBarry Smith     ierr = PetscInfo(A,"MatSolve is called with singular matrix factor, skip\n");CHKERRQ(ierr);
220245fece9SBarry Smith     ierr = VecSetInf(x);CHKERRQ(ierr);
221245fece9SBarry Smith     PetscFunctionReturn(0);
222245fece9SBarry Smith   }
223245fece9SBarry Smith 
224245fece9SBarry Smith   lu->options.Trans = TRANS;
225245fece9SBarry Smith   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
226245fece9SBarry Smith   PetscFunctionReturn(0);
227245fece9SBarry Smith }
228245fece9SBarry Smith 
229245fece9SBarry Smith PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
230245fece9SBarry Smith {
231245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)A->data;
232245fece9SBarry Smith   PetscErrorCode ierr;
233245fece9SBarry Smith 
234245fece9SBarry Smith   PetscFunctionBegin;
235603e8f96SBarry Smith   if (A->factorerrortype) {
236245fece9SBarry Smith     ierr = PetscInfo(A,"MatSolve is called with singular matrix factor, skip\n");CHKERRQ(ierr);
237245fece9SBarry Smith     ierr = VecSetInf(x);CHKERRQ(ierr);
238245fece9SBarry Smith     PetscFunctionReturn(0);
239245fece9SBarry Smith   }
240245fece9SBarry Smith 
241245fece9SBarry Smith   lu->options.Trans = NOTRANS;
242245fece9SBarry Smith   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
243245fece9SBarry Smith   PetscFunctionReturn(0);
244245fece9SBarry Smith }
245245fece9SBarry Smith 
246245fece9SBarry Smith static PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
2475a46d3faSBarry Smith {
248245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)F->data;
249cae5a23dSHong Zhang   Mat_SeqAIJ     *aa;
2505a46d3faSBarry Smith   PetscErrorCode ierr;
2515a46d3faSBarry Smith   PetscInt       sinfo;
2525a46d3faSBarry Smith   PetscReal      ferr, berr;
2535a46d3faSBarry Smith   NCformat       *Ustore;
2545a46d3faSBarry Smith   SCformat       *Lstore;
2555a46d3faSBarry Smith 
2565a46d3faSBarry Smith   PetscFunctionBegin;
2575a46d3faSBarry Smith   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
2585a46d3faSBarry Smith     lu->options.Fact = SamePattern;
2595a46d3faSBarry Smith     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
2605a46d3faSBarry Smith     Destroy_SuperMatrix_Store(&lu->A);
2612366e350SStefano Zampini     if (lu->A_dup) {
262cae5a23dSHong Zhang       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
263cae5a23dSHong Zhang     }
2642366e350SStefano Zampini 
2655a46d3faSBarry Smith     if (lu->lwork >= 0) {
266d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
267d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
2685a46d3faSBarry Smith       lu->options.Fact = SamePattern;
2695a46d3faSBarry Smith     }
2705a46d3faSBarry Smith   }
2715a46d3faSBarry Smith 
2725a46d3faSBarry Smith   /* Create the SuperMatrix for lu->A=A^T:
2735a46d3faSBarry Smith        Since SuperLU likes column-oriented matrices,we pass it the transpose,
2745a46d3faSBarry Smith        and then solve A^T X = B in MatSolve(). */
2752366e350SStefano Zampini   if (lu->A_dup) {
276cae5a23dSHong Zhang     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
277cae5a23dSHong Zhang   } else {
278cae5a23dSHong Zhang     aa = (Mat_SeqAIJ*)(A)->data;
279cae5a23dSHong Zhang   }
2805a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
2813cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
282d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_CompCol_Matrix",cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE));
2833cb270beSHong Zhang #else
284d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_CompCol_Matrix",zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE));
2853cb270beSHong Zhang #endif
2863cb270beSHong Zhang #else
2873cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
288d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_CompCol_Matrix",sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE));
2895a46d3faSBarry Smith #else
290d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_CompCol_Matrix",dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE));
2915a46d3faSBarry Smith #endif
2923cb270beSHong Zhang #endif
2935a46d3faSBarry Smith 
2945a46d3faSBarry Smith   /* Numerical factorization */
2955a46d3faSBarry Smith   lu->B.ncol = 0;  /* Indicate not to solve the system */
296d5f3da31SBarry Smith   if (F->factortype == MAT_FACTOR_LU) {
2975a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
2983cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
299d387c056SBarry Smith     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3003cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
3015db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
3023cb270beSHong Zhang #else
303d387c056SBarry Smith     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3045a46d3faSBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
3055db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
3063cb270beSHong Zhang #endif
3073cb270beSHong Zhang #else
3083cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
309d387c056SBarry Smith     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3103cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
3115db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
3125a46d3faSBarry Smith #else
313d387c056SBarry Smith     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3145a46d3faSBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
315c147c726SHong Zhang                                      &lu->Glu,&lu->mem_usage, &lu->stat, &sinfo));
3165a46d3faSBarry Smith #endif
3173cb270beSHong Zhang #endif
318d5f3da31SBarry Smith   } else if (F->factortype == MAT_FACTOR_ILU) {
319cffbb591SHong Zhang     /* Compute the incomplete factorization, condition number and pivot growth */
320cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX)
3213cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
322d387c056SBarry Smith     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
3233cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
3245db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
3253cb270beSHong Zhang #else
326d387c056SBarry Smith     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
327cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
3285db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
3293cb270beSHong Zhang #endif
3303cb270beSHong Zhang #else
3313cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
332d387c056SBarry Smith     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3333cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
3345db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
335cffbb591SHong Zhang #else
336d387c056SBarry Smith     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
337cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
338c147c726SHong Zhang                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
339cffbb591SHong Zhang #endif
3403cb270beSHong Zhang #endif
341f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
3425a46d3faSBarry Smith   if (!sinfo || sinfo == lu->A.ncol+1) {
3432205254eSKarl Rupp     if (lu->options.PivotGrowth) {
344675d1226SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);CHKERRQ(ierr);
3452205254eSKarl Rupp     }
3462205254eSKarl Rupp     if (lu->options.ConditionNumber) {
347675d1226SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);CHKERRQ(ierr);
3482205254eSKarl Rupp     }
3495a46d3faSBarry Smith   } else if (sinfo > 0) {
350675d1226SHong Zhang     if (A->erroriffailure) {
35198921bdaSJacob Faibussowitsch       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %" PetscInt_FMT,sinfo);
352675d1226SHong Zhang     } else {
353675d1226SHong Zhang       if (sinfo <= lu->A.ncol) {
354675d1226SHong Zhang         if (lu->options.ILU_FillTol == 0.0) {
355603e8f96SBarry Smith           F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
356675d1226SHong Zhang         }
3577d3de750SJacob Faibussowitsch         ierr = PetscInfo(F,"Number of zero pivots %" PetscInt_FMT ", ILU_FillTol %g\n",sinfo,lu->options.ILU_FillTol);CHKERRQ(ierr);
358675d1226SHong Zhang       } else if (sinfo == lu->A.ncol + 1) {
359675d1226SHong Zhang         /*
360675d1226SHong Zhang          U is nonsingular, but RCOND is less than machine
361675d1226SHong Zhang                       precision, meaning that the matrix is singular to
362675d1226SHong Zhang                       working precision. Nevertheless, the solution and
363675d1226SHong Zhang                       error bounds are computed because there are a number
364675d1226SHong Zhang                       of situations where the computed solution can be more
365675d1226SHong Zhang                       accurate than the value of RCOND would suggest.
366675d1226SHong Zhang          */
3677d3de750SJacob Faibussowitsch         ierr = PetscInfo(F,"Matrix factor U is nonsingular, but is singular to working precision. The solution is computed. info %" PetscInt_FMT,sinfo);CHKERRQ(ierr);
368675d1226SHong Zhang       } else { /* sinfo > lu->A.ncol + 1 */
369603e8f96SBarry Smith         F->factorerrortype = MAT_FACTOR_OUTMEMORY;
3707d3de750SJacob Faibussowitsch         ierr = PetscInfo(F,"Number of bytes allocated when memory allocation fails %" PetscInt_FMT "\n",sinfo);CHKERRQ(ierr);
371675d1226SHong Zhang       }
372675d1226SHong Zhang     }
37398921bdaSJacob Faibussowitsch   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", sinfo,-sinfo);
3745a46d3faSBarry Smith 
3755a46d3faSBarry Smith   if (lu->options.PrintStat) {
376675d1226SHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");CHKERRQ(ierr);
377d387c056SBarry Smith     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
3785a46d3faSBarry Smith     Lstore = (SCformat*) lu->L.Store;
3795a46d3faSBarry Smith     Ustore = (NCformat*) lu->U.Store;
380c0aa6a63SJacob Faibussowitsch     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %" PetscInt_FMT "\n", Lstore->nnz);
381c0aa6a63SJacob Faibussowitsch     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %" PetscInt_FMT "\n", Ustore->nnz);
382c0aa6a63SJacob Faibussowitsch     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %" PetscInt_FMT "\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
3836da386baSHong Zhang     ierr   = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
3846da386baSHong Zhang                          lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
3855a46d3faSBarry Smith   }
3865a46d3faSBarry Smith 
3875a46d3faSBarry Smith   lu->flg                = SAME_NONZERO_PATTERN;
3881d5ca7f3SHong Zhang   F->ops->solve          = MatSolve_SuperLU;
3891d5ca7f3SHong Zhang   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
3901aef8b4cSStefano Zampini   F->ops->matsolve       = NULL;
3915a46d3faSBarry Smith   PetscFunctionReturn(0);
3925a46d3faSBarry Smith }
3935a46d3faSBarry Smith 
394245fece9SBarry Smith static PetscErrorCode MatDestroy_SuperLU(Mat A)
39514b396bbSKris Buschelman {
396dfbe8321SBarry Smith   PetscErrorCode ierr;
397245fece9SBarry Smith   Mat_SuperLU    *lu=(Mat_SuperLU*)A->data;
39814b396bbSKris Buschelman 
39914b396bbSKris Buschelman   PetscFunctionBegin;
400245fece9SBarry Smith   if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
401d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->A));
402d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->B));
403d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->X));
404d387c056SBarry Smith     PetscStackCall("SuperLU:StatFree",StatFree(&lu->stat));
4050e742b69SHong Zhang     if (lu->lwork >= 0) {
406d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
407d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
4080e742b69SHong Zhang     }
4090e742b69SHong Zhang   }
4109ce50919SHong Zhang   ierr = PetscFree(lu->etree);CHKERRQ(ierr);
4119ce50919SHong Zhang   ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
4129ce50919SHong Zhang   ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
4139ce50919SHong Zhang   ierr = PetscFree(lu->R);CHKERRQ(ierr);
4149ce50919SHong Zhang   ierr = PetscFree(lu->C);CHKERRQ(ierr);
415bf0cc555SLisandro Dalcin   ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
416bf0cc555SLisandro Dalcin   ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
417245fece9SBarry Smith   ierr = PetscFree(A->data);CHKERRQ(ierr);
4180e742b69SHong Zhang 
419d954db57SHong Zhang   /* clear composed functions */
4203ca39a21SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr);
421bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluSetILUDropTol_C",NULL);CHKERRQ(ierr);
42214b396bbSKris Buschelman   PetscFunctionReturn(0);
42314b396bbSKris Buschelman }
42414b396bbSKris Buschelman 
425245fece9SBarry Smith static PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
42614b396bbSKris Buschelman {
427dfbe8321SBarry Smith   PetscErrorCode    ierr;
428ace3abfcSBarry Smith   PetscBool         iascii;
42914b396bbSKris Buschelman   PetscViewerFormat format;
43014b396bbSKris Buschelman 
43114b396bbSKris Buschelman   PetscFunctionBegin;
432251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
43332077d6dSBarry Smith   if (iascii) {
43414b396bbSKris Buschelman     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
4352f59403fSHong Zhang     if (format == PETSC_VIEWER_ASCII_INFO) {
436860c79edSBarry Smith       ierr = MatView_Info_SuperLU(A,viewer);CHKERRQ(ierr);
43714b396bbSKris Buschelman     }
43814b396bbSKris Buschelman   }
43914b396bbSKris Buschelman   PetscFunctionReturn(0);
44014b396bbSKris Buschelman }
44114b396bbSKris Buschelman 
442e0b74bf9SHong Zhang PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
443e0b74bf9SHong Zhang {
444245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)A->data;
445cd723cd1SBarry Smith   PetscBool      flg;
446cd723cd1SBarry Smith   PetscErrorCode ierr;
447e0b74bf9SHong Zhang 
448e0b74bf9SHong Zhang   PetscFunctionBegin;
4490298fd71SBarry Smith   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
4502c71b3e2SJacob Faibussowitsch   PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
4510298fd71SBarry Smith   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
4522c71b3e2SJacob Faibussowitsch   PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
4532205254eSKarl Rupp   lu->options.Trans = TRANS;
454e0b74bf9SHong Zhang   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
455e0b74bf9SHong Zhang   PetscFunctionReturn(0);
456e0b74bf9SHong Zhang }
457e0b74bf9SHong Zhang 
458245fece9SBarry Smith static PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
45914b396bbSKris Buschelman {
460245fece9SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)(F->data);
4612366e350SStefano Zampini   PetscErrorCode ierr;
462b24902e0SBarry Smith 
463b24902e0SBarry Smith   PetscFunctionBegin;
464b24902e0SBarry Smith   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
465b24902e0SBarry Smith   lu->CleanUpSuperLU      = PETSC_TRUE;
4661d5ca7f3SHong Zhang   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
4672366e350SStefano Zampini 
4682366e350SStefano Zampini   /* if we are here, the nonzero pattern has changed unless the user explicitly called MatLUFactorSymbolic */
4692366e350SStefano Zampini   ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
4702366e350SStefano Zampini   if (lu->needconversion) {
4712366e350SStefano Zampini     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&lu->A_dup);CHKERRQ(ierr);
4722366e350SStefano Zampini   }
4732366e350SStefano Zampini   if (lu->options.Equil == YES && !lu->A_dup) { /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
4742366e350SStefano Zampini     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
4752366e350SStefano Zampini   }
476b24902e0SBarry Smith   PetscFunctionReturn(0);
477b24902e0SBarry Smith }
478b24902e0SBarry Smith 
479b2573a8aSBarry Smith static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
4805ccb76cbSHong Zhang {
481245fece9SBarry Smith   Mat_SuperLU *lu= (Mat_SuperLU*)F->data;
4825ccb76cbSHong Zhang 
4835ccb76cbSHong Zhang   PetscFunctionBegin;
4845ccb76cbSHong Zhang   lu->options.ILU_DropTol = dtol;
4855ccb76cbSHong Zhang   PetscFunctionReturn(0);
4865ccb76cbSHong Zhang }
4875ccb76cbSHong Zhang 
4885ccb76cbSHong Zhang /*@
4895ccb76cbSHong Zhang   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
490*147403d9SBarry Smith 
4915ccb76cbSHong Zhang    Logically Collective on Mat
4925ccb76cbSHong Zhang 
4935ccb76cbSHong Zhang    Input Parameters:
4945ccb76cbSHong Zhang +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
4955ccb76cbSHong Zhang -  dtol - drop tolerance
4965ccb76cbSHong Zhang 
4975ccb76cbSHong Zhang   Options Database:
498*147403d9SBarry Smith .   -mat_superlu_ilu_droptol <dtol> - the drop tolerance
4995ccb76cbSHong Zhang 
5005ccb76cbSHong Zhang    Level: beginner
5015ccb76cbSHong Zhang 
50296a0c994SBarry Smith    References:
503606c0280SSatish Balay .  * - SuperLU Users' Guide
5045ccb76cbSHong Zhang 
5055ccb76cbSHong Zhang .seealso: MatGetFactor()
5065ccb76cbSHong Zhang @*/
5075ccb76cbSHong Zhang PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
5085ccb76cbSHong Zhang {
5095ccb76cbSHong Zhang   PetscErrorCode ierr;
5105ccb76cbSHong Zhang 
5115ccb76cbSHong Zhang   PetscFunctionBegin;
5125ccb76cbSHong Zhang   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
51369fbec6eSBarry Smith   PetscValidLogicalCollectiveReal(F,dtol,2);
5145ccb76cbSHong Zhang   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
5155ccb76cbSHong Zhang   PetscFunctionReturn(0);
5165ccb76cbSHong Zhang }
5175ccb76cbSHong Zhang 
518ea799195SBarry Smith PetscErrorCode MatFactorGetSolverType_seqaij_superlu(Mat A,MatSolverType *type)
51935bd34faSBarry Smith {
52035bd34faSBarry Smith   PetscFunctionBegin;
5212692d6eeSBarry Smith   *type = MATSOLVERSUPERLU;
52235bd34faSBarry Smith   PetscFunctionReturn(0);
52335bd34faSBarry Smith }
52435bd34faSBarry Smith 
525b24902e0SBarry Smith /*MC
526ba992d64SSatish Balay   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
527b24902e0SBarry Smith   via the external package SuperLU.
528b24902e0SBarry Smith 
529e2e64c6bSBarry Smith   Use ./configure --download-superlu to have PETSc installed with SuperLU
530b24902e0SBarry Smith 
5313ca39a21SBarry Smith   Use -pc_type lu -pc_factor_mat_solver_type superlu to use this direct solver
532c2b89b5dSBarry Smith 
533b24902e0SBarry Smith   Options Database Keys:
534e08999f5SMatthew G Knepley + -mat_superlu_equil <FALSE>            - Equil (None)
535e08999f5SMatthew G Knepley . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
536e08999f5SMatthew G Knepley . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
537e08999f5SMatthew G Knepley . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
538e08999f5SMatthew G Knepley . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
539e08999f5SMatthew G Knepley . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
540e08999f5SMatthew G Knepley . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
541e08999f5SMatthew G Knepley . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
542e08999f5SMatthew G Knepley . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
543e08999f5SMatthew G Knepley . -mat_superlu_printstat <FALSE>        - PrintStat (None)
544e08999f5SMatthew G Knepley . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
545e08999f5SMatthew G Knepley . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
546e08999f5SMatthew G Knepley . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
547e08999f5SMatthew G Knepley . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
548e08999f5SMatthew G Knepley . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
549e08999f5SMatthew G Knepley . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
550e08999f5SMatthew G Knepley - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
551b24902e0SBarry Smith 
55295452b02SPatrick Sanan    Notes:
55395452b02SPatrick Sanan     Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
5545c9eb25fSBarry Smith 
5552c7c0729SBarry Smith     Cannot currently use ordering provided by PETSc.
5562c7c0729SBarry Smith 
557b24902e0SBarry Smith    Level: beginner
558b24902e0SBarry Smith 
5593ca39a21SBarry Smith .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverType(), MatSolverType
560b24902e0SBarry Smith M*/
561b24902e0SBarry Smith 
562db87b0f2SBarry Smith static PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
563b24902e0SBarry Smith {
56414b396bbSKris Buschelman   Mat            B;
565f0c56d0fSKris Buschelman   Mat_SuperLU    *lu;
5666849ba73SBarry Smith   PetscErrorCode ierr;
5675d8b2efaSHong Zhang   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
5688afaa268SBarry Smith   PetscBool      flg,set;
5693cb270beSHong Zhang   PetscReal      real_input;
5705ca28756SHong Zhang   const char     *colperm[]   ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
5715ca28756SHong Zhang   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
5725ca28756SHong Zhang   const char     *rowperm[]   ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
57314b396bbSKris Buschelman 
57414b396bbSKris Buschelman   PetscFunctionBegin;
575ce94432eSBarry Smith   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
576d0f46423SBarry Smith   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
577245fece9SBarry Smith   ierr = PetscStrallocpy("superlu",&((PetscObject)B)->type_name);CHKERRQ(ierr);
578245fece9SBarry Smith   ierr = MatSetUp(B);CHKERRQ(ierr);
57966e17bc3SBarry Smith   B->trivialsymbolic = PETSC_TRUE;
580cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
581b24902e0SBarry Smith     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
582cffbb591SHong Zhang     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
583b3a44c85SBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
584cffbb591SHong Zhang 
58500c67f3bSHong Zhang   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
58600c67f3bSHong Zhang   ierr = PetscStrallocpy(MATSOLVERSUPERLU,&B->solvertype);CHKERRQ(ierr);
58700c67f3bSHong Zhang 
588b9c12af5SBarry Smith   B->ops->getinfo     = MatGetInfo_External;
589b24902e0SBarry Smith   B->ops->destroy     = MatDestroy_SuperLU;
5903519fcdcSHong Zhang   B->ops->view        = MatView_SuperLU;
591d5f3da31SBarry Smith   B->factortype       = ftype;
59294b7f48cSBarry Smith   B->assembled        = PETSC_TRUE;           /* required by -ksp_view */
5935c9eb25fSBarry Smith   B->preallocated     = PETSC_TRUE;
59414b396bbSKris Buschelman 
595b00a9115SJed Brown   ierr = PetscNewLog(B,&lu);CHKERRQ(ierr);
596cae5a23dSHong Zhang 
597cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU) {
5989ce50919SHong Zhang     set_default_options(&lu->options);
5993d6581fbSHong Zhang     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
6003d6581fbSHong Zhang       "Whether or not the system will be equilibrated depends on the
6013d6581fbSHong Zhang        scaling of the matrix A, but if equilibration is used, A is
6023d6581fbSHong Zhang        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
6033d6581fbSHong Zhang        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
6043d6581fbSHong Zhang      We set 'options.Equil = NO' as default because additional space is needed for it.
6053d6581fbSHong Zhang     */
6063d6581fbSHong Zhang     lu->options.Equil = NO;
607cffbb591SHong Zhang   } else if (ftype == MAT_FACTOR_ILU) {
6080c74a584SJed Brown     /* Set the default input options of ilu: */
609d387c056SBarry Smith     PetscStackCall("SuperLU:ilu_set_default_options",ilu_set_default_options(&lu->options));
610cffbb591SHong Zhang   }
6119ce50919SHong Zhang   lu->options.PrintStat = NO;
6121a2e2f44SHong Zhang 
6135d8b2efaSHong Zhang   /* Initialize the statistics variables. */
614d387c056SBarry Smith   PetscStackCall("SuperLU:StatInit",StatInit(&lu->stat));
6158914a3f7SHong Zhang   lu->lwork = 0;   /* allocate space internally by system malloc */
6169ce50919SHong Zhang 
617ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
6188afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,NULL);CHKERRQ(ierr);
6198914a3f7SHong Zhang   ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
6202205254eSKarl Rupp   if (flg) lu->options.ColPerm = (colperm_t)indx;
6218914a3f7SHong Zhang   ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
6222205254eSKarl Rupp   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
6238afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,&set);CHKERRQ(ierr);
6248afaa268SBarry Smith   if (set && flg) lu->options.SymmetricMode = YES;
6253cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
6263cb270beSHong Zhang   if (flg) lu->options.DiagPivotThresh = (double) real_input;
6278afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,&set);CHKERRQ(ierr);
6288afaa268SBarry Smith   if (set && flg) lu->options.PivotGrowth = YES;
6298afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,&set);CHKERRQ(ierr);
6308afaa268SBarry Smith   if (set && flg) lu->options.ConditionNumber = YES;
631d7ebd59bSHong Zhang   ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
6322205254eSKarl Rupp   if (flg) lu->options.RowPerm = (rowperm_t)indx;
6338afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,&set);CHKERRQ(ierr);
6348afaa268SBarry Smith   if (set && flg) lu->options.ReplaceTinyPivot = YES;
6358afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,&set);CHKERRQ(ierr);
6368afaa268SBarry Smith   if (set && flg) lu->options.PrintStat = YES;
6370298fd71SBarry Smith   ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,NULL);CHKERRQ(ierr);
6385fe6150dSHong Zhang   if (lu->lwork > 0) {
639d87de817SBarry Smith     /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/
6405fe6150dSHong Zhang     ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
6415fe6150dSHong Zhang   } else if (lu->lwork != 0 && lu->lwork != -1) {
642c0aa6a63SJacob Faibussowitsch     ierr      = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %" PetscInt_FMT " is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
6438914a3f7SHong Zhang     lu->lwork = 0;
6448914a3f7SHong Zhang   }
645cffbb591SHong Zhang   /* ilu options */
6463cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
6473cb270beSHong Zhang   if (flg) lu->options.ILU_DropTol = (double) real_input;
6483cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
6493cb270beSHong Zhang   if (flg) lu->options.ILU_FillTol = (double) real_input;
6503cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
6513cb270beSHong Zhang   if (flg) lu->options.ILU_FillFactor = (double) real_input;
6520298fd71SBarry Smith   ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,NULL);CHKERRQ(ierr);
653cffbb591SHong Zhang   ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
6542205254eSKarl Rupp   if (flg) lu->options.ILU_Norm = (norm_t)indx;
655cffbb591SHong Zhang   ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
6562205254eSKarl Rupp   if (flg) lu->options.ILU_MILU = (milu_t)indx;
657245fece9SBarry Smith   ierr = PetscOptionsEnd();CHKERRQ(ierr);
6589ce50919SHong Zhang 
6595d8b2efaSHong Zhang   /* Allocate spaces (notice sizes are for the transpose) */
660785e854fSJed Brown   ierr = PetscMalloc1(m,&lu->etree);CHKERRQ(ierr);
661785e854fSJed Brown   ierr = PetscMalloc1(n,&lu->perm_r);CHKERRQ(ierr);
662785e854fSJed Brown   ierr = PetscMalloc1(m,&lu->perm_c);CHKERRQ(ierr);
663785e854fSJed Brown   ierr = PetscMalloc1(n,&lu->R);CHKERRQ(ierr);
664785e854fSJed Brown   ierr = PetscMalloc1(m,&lu->C);CHKERRQ(ierr);
6655d8b2efaSHong Zhang 
6665d8b2efaSHong Zhang   /* create rhs and solution x without allocate space for .Store */
6675d8b2efaSHong Zhang #if defined(PETSC_USE_COMPLEX)
6683cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
669d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
670d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
6713cb270beSHong Zhang #else
672d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
673d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
6743cb270beSHong Zhang #endif
6753cb270beSHong Zhang #else
6763cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
677d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
678d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
6795d8b2efaSHong Zhang #else
680d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
681d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
6825d8b2efaSHong Zhang #endif
6833cb270beSHong Zhang #endif
6845d8b2efaSHong Zhang 
6853ca39a21SBarry Smith   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_seqaij_superlu);CHKERRQ(ierr);
686bdf89e91SBarry Smith   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatSuperluSetILUDropTol_C",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
687245fece9SBarry Smith   B->data  = lu;
68820be8e61SHong Zhang 
689899d7b4fSKris Buschelman   *F       = B;
69014b396bbSKris Buschelman   PetscFunctionReturn(0);
69114b396bbSKris Buschelman }
692d954db57SHong Zhang 
6932366e350SStefano Zampini static PetscErrorCode MatGetFactor_seqsell_superlu(Mat A,MatFactorType ftype,Mat *F)
6942366e350SStefano Zampini {
6952366e350SStefano Zampini   Mat_SuperLU    *lu;
6962366e350SStefano Zampini   PetscErrorCode ierr;
6972366e350SStefano Zampini 
6982366e350SStefano Zampini   PetscFunctionBegin;
6992366e350SStefano Zampini   ierr = MatGetFactor_seqaij_superlu(A,ftype,F);CHKERRQ(ierr);
7002366e350SStefano Zampini   lu   = (Mat_SuperLU*)((*F)->data);
7012366e350SStefano Zampini   lu->needconversion = PETSC_TRUE;
7022366e350SStefano Zampini   PetscFunctionReturn(0);
7032366e350SStefano Zampini }
7042366e350SStefano Zampini 
7053ca39a21SBarry Smith PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU(void)
70642c9c57cSBarry Smith {
70742c9c57cSBarry Smith   PetscErrorCode ierr;
70842c9c57cSBarry Smith 
70942c9c57cSBarry Smith   PetscFunctionBegin;
7103ca39a21SBarry Smith   ierr = MatSolverTypeRegister(MATSOLVERSUPERLU,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr);
7113ca39a21SBarry Smith   ierr = MatSolverTypeRegister(MATSOLVERSUPERLU,MATSEQAIJ,MAT_FACTOR_ILU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr);
7122366e350SStefano Zampini   ierr = MatSolverTypeRegister(MATSOLVERSUPERLU,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_seqsell_superlu);CHKERRQ(ierr);
7132366e350SStefano Zampini   ierr = MatSolverTypeRegister(MATSOLVERSUPERLU,MATSEQSELL,MAT_FACTOR_ILU,MatGetFactor_seqsell_superlu);CHKERRQ(ierr);
71442c9c57cSBarry Smith   PetscFunctionReturn(0);
71542c9c57cSBarry Smith }
716