xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision 00c67f3b09c0bcda06af5ed306d845d9138e5003)
114b396bbSKris Buschelman 
25a46d3faSBarry Smith /*  --------------------------------------------------------------------
35a46d3faSBarry Smith 
45a46d3faSBarry Smith      This file implements a subclass of the SeqAIJ matrix class that uses
5c2b89b5dSBarry Smith      the SuperLU sparse solver.
614b396bbSKris Buschelman */
714b396bbSKris Buschelman 
85a46d3faSBarry Smith /*
95a46d3faSBarry Smith      Defines the data structure for the base matrix type (SeqAIJ)
105a46d3faSBarry Smith */
11c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
1214b396bbSKris Buschelman 
135a46d3faSBarry Smith /*
145a46d3faSBarry Smith      SuperLU include files
155a46d3faSBarry Smith */
1614b396bbSKris Buschelman EXTERN_C_BEGIN
1714b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
183cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
193cb270beSHong Zhang #include <slu_cdefs.h>
203cb270beSHong Zhang #else
21c6db04a5SJed Brown #include <slu_zdefs.h>
223cb270beSHong Zhang #endif
233cb270beSHong Zhang #else
243cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
253cb270beSHong Zhang #include <slu_sdefs.h>
2614b396bbSKris Buschelman #else
27c6db04a5SJed Brown #include <slu_ddefs.h>
2814b396bbSKris Buschelman #endif
293cb270beSHong Zhang #endif
30c6db04a5SJed Brown #include <slu_util.h>
3114b396bbSKris Buschelman EXTERN_C_END
3214b396bbSKris Buschelman 
335a46d3faSBarry Smith /*
345a46d3faSBarry Smith      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
355a46d3faSBarry Smith */
3614b396bbSKris Buschelman typedef struct {
3775af56d4SHong Zhang   SuperMatrix       A,L,U,B,X;
3875af56d4SHong Zhang   superlu_options_t options;
39da7a1d00SHong Zhang   PetscInt          *perm_c; /* column permutation vector */
40da7a1d00SHong Zhang   PetscInt          *perm_r; /* row permutations from partial pivoting */
41da7a1d00SHong Zhang   PetscInt          *etree;
42da7a1d00SHong Zhang   PetscReal         *R, *C;
4375af56d4SHong Zhang   char              equed[1];
44da7a1d00SHong Zhang   PetscInt          lwork;
4575af56d4SHong Zhang   void              *work;
46da7a1d00SHong Zhang   PetscReal         rpg, rcond;
4775af56d4SHong Zhang   mem_usage_t       mem_usage;
4875af56d4SHong Zhang   MatStructure      flg;
495d8b2efaSHong Zhang   SuperLUStat_t     stat;
50cae5a23dSHong Zhang   Mat               A_dup;
51cae5a23dSHong Zhang   PetscScalar       *rhs_dup;
52c147c726SHong Zhang   GlobalLU_t        Glu;
5314b396bbSKris Buschelman 
5414b396bbSKris Buschelman   /* Flag to clean up (non-global) SuperLU objects during Destroy */
55ace3abfcSBarry Smith   PetscBool CleanUpSuperLU;
56f0c56d0fSKris Buschelman } Mat_SuperLU;
5714b396bbSKris Buschelman 
58e0e586b9SHong Zhang extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
599a625307SHong Zhang extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo*);
60e0e586b9SHong Zhang extern PetscErrorCode MatDestroy_SuperLU(Mat);
61e0e586b9SHong Zhang extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
62e0e586b9SHong Zhang extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
63e0e586b9SHong Zhang extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
64e0b74bf9SHong Zhang extern PetscErrorCode MatMatSolve_SuperLU(Mat,Mat,Mat);
65e0e586b9SHong Zhang extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
669a625307SHong Zhang extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*);
67e0e586b9SHong Zhang extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat*);
68e0e586b9SHong Zhang 
695a46d3faSBarry Smith /*
705a46d3faSBarry Smith     Utility function
715a46d3faSBarry Smith */
725a46d3faSBarry Smith #undef __FUNCT__
735a46d3faSBarry Smith #define __FUNCT__ "MatFactorInfo_SuperLU"
745a46d3faSBarry Smith PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
755a46d3faSBarry Smith {
765a46d3faSBarry Smith   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
775a46d3faSBarry Smith   PetscErrorCode    ierr;
785a46d3faSBarry Smith   superlu_options_t options;
795a46d3faSBarry Smith 
805a46d3faSBarry Smith   PetscFunctionBegin;
815a46d3faSBarry Smith   /* check if matrix is superlu_dist type */
825a46d3faSBarry Smith   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
835a46d3faSBarry Smith 
845a46d3faSBarry Smith   options = lu->options;
852205254eSKarl Rupp 
865a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
875a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES" : "NO");CHKERRQ(ierr);
885a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
895a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
905a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES" : "NO");CHKERRQ(ierr);
915a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
925a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES" : "NO");CHKERRQ(ierr);
935a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES" : "NO");CHKERRQ(ierr);
945a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
955a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES" : "NO");CHKERRQ(ierr);
965a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES" : "NO");CHKERRQ(ierr);
975a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
98d5f3da31SBarry Smith   if (A->factortype == MAT_FACTOR_ILU) {
99cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
100cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
101cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
102cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr);
103cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr);
104cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr);
105cffbb591SHong Zhang   }
1065a46d3faSBarry Smith   PetscFunctionReturn(0);
1075a46d3faSBarry Smith }
1085a46d3faSBarry Smith 
1095a46d3faSBarry Smith /*
1105a46d3faSBarry Smith     These are the methods provided to REPLACE the corresponding methods of the
1115a46d3faSBarry Smith    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
1125a46d3faSBarry Smith */
1135a46d3faSBarry Smith #undef __FUNCT__
1145a46d3faSBarry Smith #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
1159a625307SHong Zhang PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
1165a46d3faSBarry Smith {
1171d5ca7f3SHong Zhang   Mat_SuperLU    *lu = (Mat_SuperLU*)F->spptr;
118cae5a23dSHong Zhang   Mat_SeqAIJ     *aa;
1195a46d3faSBarry Smith   PetscErrorCode ierr;
1205a46d3faSBarry Smith   PetscInt       sinfo;
1215a46d3faSBarry Smith   PetscReal      ferr, berr;
1225a46d3faSBarry Smith   NCformat       *Ustore;
1235a46d3faSBarry Smith   SCformat       *Lstore;
1245a46d3faSBarry Smith 
1255a46d3faSBarry Smith   PetscFunctionBegin;
1265a46d3faSBarry Smith   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
1275a46d3faSBarry Smith     lu->options.Fact = SamePattern;
1285a46d3faSBarry Smith     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
1295a46d3faSBarry Smith     Destroy_SuperMatrix_Store(&lu->A);
130cae5a23dSHong Zhang     if (lu->options.Equil) {
131cae5a23dSHong Zhang       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
132cae5a23dSHong Zhang     }
1335a46d3faSBarry Smith     if (lu->lwork >= 0) {
134d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
135d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
1365a46d3faSBarry Smith       lu->options.Fact = SamePattern;
1375a46d3faSBarry Smith     }
1385a46d3faSBarry Smith   }
1395a46d3faSBarry Smith 
1405a46d3faSBarry Smith   /* Create the SuperMatrix for lu->A=A^T:
1415a46d3faSBarry Smith        Since SuperLU likes column-oriented matrices,we pass it the transpose,
1425a46d3faSBarry Smith        and then solve A^T X = B in MatSolve(). */
143cae5a23dSHong Zhang   if (lu->options.Equil) {
144cae5a23dSHong Zhang     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
145cae5a23dSHong Zhang   } else {
146cae5a23dSHong Zhang     aa = (Mat_SeqAIJ*)(A)->data;
147cae5a23dSHong Zhang   }
1485a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
1493cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
150d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_CompCol_Matrix",cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE));
1513cb270beSHong Zhang #else
152d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_CompCol_Matrix",zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE));
1533cb270beSHong Zhang #endif
1543cb270beSHong Zhang #else
1553cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
156d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_CompCol_Matrix",sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE));
1575a46d3faSBarry Smith #else
158d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_CompCol_Matrix",dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE));
1595a46d3faSBarry Smith #endif
1603cb270beSHong Zhang #endif
1615a46d3faSBarry Smith 
1625a46d3faSBarry Smith   /* Numerical factorization */
1635a46d3faSBarry Smith   lu->B.ncol = 0;  /* Indicate not to solve the system */
164d5f3da31SBarry Smith   if (F->factortype == MAT_FACTOR_LU) {
1655a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
1663cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
167d387c056SBarry Smith     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
1683cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
1695db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
1703cb270beSHong Zhang #else
171d387c056SBarry Smith     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
1725a46d3faSBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
1735db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
1743cb270beSHong Zhang #endif
1753cb270beSHong Zhang #else
1763cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
177d387c056SBarry Smith     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
1783cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
1795db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
1805a46d3faSBarry Smith #else
181d387c056SBarry Smith     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
1825a46d3faSBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
183c147c726SHong Zhang                                      &lu->Glu,&lu->mem_usage, &lu->stat, &sinfo));
1845a46d3faSBarry Smith #endif
1853cb270beSHong Zhang #endif
186d5f3da31SBarry Smith   } else if (F->factortype == MAT_FACTOR_ILU) {
187cffbb591SHong Zhang     /* Compute the incomplete factorization, condition number and pivot growth */
188cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX)
1893cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
190d387c056SBarry Smith     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
1913cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
1925db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
1933cb270beSHong Zhang #else
194d387c056SBarry Smith     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
195cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
1965db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
1973cb270beSHong Zhang #endif
1983cb270beSHong Zhang #else
1993cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
200d387c056SBarry Smith     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
2013cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
2025db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
203cffbb591SHong Zhang #else
204d387c056SBarry Smith     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
205cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
206c147c726SHong Zhang                                      &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo));
207cffbb591SHong Zhang #endif
2083cb270beSHong Zhang #endif
209f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
2105a46d3faSBarry Smith   if (!sinfo || sinfo == lu->A.ncol+1) {
2112205254eSKarl Rupp     if (lu->options.PivotGrowth) {
212675d1226SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);CHKERRQ(ierr);
2132205254eSKarl Rupp     }
2142205254eSKarl Rupp     if (lu->options.ConditionNumber) {
215675d1226SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);CHKERRQ(ierr);
2162205254eSKarl Rupp     }
2175a46d3faSBarry Smith   } else if (sinfo > 0) {
218675d1226SHong Zhang     if (A->erroriffailure) {
219675d1226SHong Zhang       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo);
220675d1226SHong Zhang     } else {
221675d1226SHong Zhang       if (sinfo <= lu->A.ncol) {
222675d1226SHong Zhang         if (lu->options.ILU_FillTol == 0.0) {
223675d1226SHong Zhang           F->errortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
224675d1226SHong Zhang         }
225675d1226SHong Zhang         ierr = PetscInfo2(F,"Number of zero pivots %D, ILU_FillTol %g\n",sinfo,lu->options.ILU_FillTol);CHKERRQ(ierr);
226675d1226SHong Zhang       } else if (sinfo == lu->A.ncol + 1) {
227675d1226SHong Zhang         /*
228675d1226SHong Zhang          U is nonsingular, but RCOND is less than machine
229675d1226SHong Zhang  		      precision, meaning that the matrix is singular to
230675d1226SHong Zhang  		      working precision. Nevertheless, the solution and
231675d1226SHong Zhang  		      error bounds are computed because there are a number
232675d1226SHong Zhang  		      of situations where the computed solution can be more
233675d1226SHong Zhang  		      accurate than the value of RCOND would suggest.
234675d1226SHong Zhang          */
235675d1226SHong Zhang         ierr = PetscInfo1(F,"Matrix factor U is nonsingular, but is singular to working precision. The solution is computed. info %D",sinfo);CHKERRQ(ierr);
236675d1226SHong Zhang       } else { /* sinfo > lu->A.ncol + 1 */
237675d1226SHong Zhang         F->errortype = MAT_FACTOR_OUTMEMORY;
238675d1226SHong Zhang         ierr = PetscInfo1(F,"Number of bytes allocated when memory allocation fails %D\n",sinfo);CHKERRQ(ierr);
239675d1226SHong Zhang       }
240675d1226SHong Zhang     }
241f23aa3ddSBarry Smith   } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
2425a46d3faSBarry Smith 
2435a46d3faSBarry Smith   if (lu->options.PrintStat) {
244675d1226SHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");CHKERRQ(ierr);
245d387c056SBarry Smith     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
2465a46d3faSBarry Smith     Lstore = (SCformat*) lu->L.Store;
2475a46d3faSBarry Smith     Ustore = (NCformat*) lu->U.Store;
2485a46d3faSBarry Smith     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
2495a46d3faSBarry Smith     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
2505a46d3faSBarry Smith     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
2516da386baSHong Zhang     ierr   = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
2526da386baSHong Zhang                          lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
2535a46d3faSBarry Smith   }
2545a46d3faSBarry Smith 
2555a46d3faSBarry Smith   lu->flg                = SAME_NONZERO_PATTERN;
2561d5ca7f3SHong Zhang   F->ops->solve          = MatSolve_SuperLU;
2571d5ca7f3SHong Zhang   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
2581aef8b4cSStefano Zampini   F->ops->matsolve       = NULL;
2595a46d3faSBarry Smith   PetscFunctionReturn(0);
2605a46d3faSBarry Smith }
2615a46d3faSBarry Smith 
26214b396bbSKris Buschelman #undef __FUNCT__
26320be8e61SHong Zhang #define __FUNCT__ "MatGetDiagonal_SuperLU"
26420be8e61SHong Zhang PetscErrorCode MatGetDiagonal_SuperLU(Mat A,Vec v)
26520be8e61SHong Zhang {
26620be8e61SHong Zhang   PetscFunctionBegin;
26720be8e61SHong Zhang   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Mat type: SuperLU factor");
26820be8e61SHong Zhang   PetscFunctionReturn(0);
26920be8e61SHong Zhang }
27020be8e61SHong Zhang 
27120be8e61SHong Zhang #undef __FUNCT__
272f0c56d0fSKris Buschelman #define __FUNCT__ "MatDestroy_SuperLU"
273dfbe8321SBarry Smith PetscErrorCode MatDestroy_SuperLU(Mat A)
27414b396bbSKris Buschelman {
275dfbe8321SBarry Smith   PetscErrorCode ierr;
276f0c56d0fSKris Buschelman   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
27714b396bbSKris Buschelman 
27814b396bbSKris Buschelman   PetscFunctionBegin;
279bf0cc555SLisandro Dalcin   if (lu && lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
280d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->A));
281d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->B));
282d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->X));
283d387c056SBarry Smith     PetscStackCall("SuperLU:StatFree",StatFree(&lu->stat));
2840e742b69SHong Zhang     if (lu->lwork >= 0) {
285d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
286d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
2870e742b69SHong Zhang     }
2880e742b69SHong Zhang   }
289bf0cc555SLisandro Dalcin   if (lu) {
2909ce50919SHong Zhang     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
2919ce50919SHong Zhang     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
2929ce50919SHong Zhang     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
2939ce50919SHong Zhang     ierr = PetscFree(lu->R);CHKERRQ(ierr);
2949ce50919SHong Zhang     ierr = PetscFree(lu->C);CHKERRQ(ierr);
295bf0cc555SLisandro Dalcin     ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
296bf0cc555SLisandro Dalcin     ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
297bf0cc555SLisandro Dalcin   }
298bf0cc555SLisandro Dalcin   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
2990e742b69SHong Zhang 
300d954db57SHong Zhang   /* clear composed functions */
301bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
302bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluSetILUDropTol_C",NULL);CHKERRQ(ierr);
303d954db57SHong Zhang 
304b24902e0SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
30514b396bbSKris Buschelman   PetscFunctionReturn(0);
30614b396bbSKris Buschelman }
30714b396bbSKris Buschelman 
30814b396bbSKris Buschelman #undef __FUNCT__
309f0c56d0fSKris Buschelman #define __FUNCT__ "MatView_SuperLU"
310dfbe8321SBarry Smith PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
31114b396bbSKris Buschelman {
312dfbe8321SBarry Smith   PetscErrorCode    ierr;
313ace3abfcSBarry Smith   PetscBool         iascii;
31414b396bbSKris Buschelman   PetscViewerFormat format;
31514b396bbSKris Buschelman 
31614b396bbSKris Buschelman   PetscFunctionBegin;
317251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
31832077d6dSBarry Smith   if (iascii) {
31914b396bbSKris Buschelman     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
3202f59403fSHong Zhang     if (format == PETSC_VIEWER_ASCII_INFO) {
321f0c56d0fSKris Buschelman       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
32214b396bbSKris Buschelman     }
32314b396bbSKris Buschelman   }
32414b396bbSKris Buschelman   PetscFunctionReturn(0);
32514b396bbSKris Buschelman }
32614b396bbSKris Buschelman 
32714b396bbSKris Buschelman 
32814b396bbSKris Buschelman #undef __FUNCT__
329c29e884eSHong Zhang #define __FUNCT__ "MatSolve_SuperLU_Private"
330c29e884eSHong Zhang PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
33114b396bbSKris Buschelman {
332f0c56d0fSKris Buschelman   Mat_SuperLU       *lu = (Mat_SuperLU*)A->spptr;
333d9ca1df4SBarry Smith   const PetscScalar *barray;
334d9ca1df4SBarry Smith   PetscScalar       *xarray;
335dfbe8321SBarry Smith   PetscErrorCode    ierr;
336077aedafSJed Brown   PetscInt          info,i,n;
337da7a1d00SHong Zhang   PetscReal         ferr,berr;
338dff31646SBarry Smith   static PetscBool  cite = PETSC_FALSE;
33914b396bbSKris Buschelman 
34014b396bbSKris Buschelman   PetscFunctionBegin;
3412205254eSKarl Rupp   if (lu->lwork == -1) PetscFunctionReturn(0);
342dff31646SBarry Smith   ierr = PetscCitationsRegister("@article{superlu99,\n  author  = {James W. Demmel and Stanley C. Eisenstat and\n             John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n  title = {A supernodal approach to sparse partial pivoting},\n  journal = {SIAM J. Matrix Analysis and Applications},\n  year = {1999},\n  volume  = {20},\n  number = {3},\n  pages = {720-755}\n}\n",&cite);CHKERRQ(ierr);
343cae5a23dSHong Zhang 
344077aedafSJed Brown   ierr = VecGetLocalSize(x,&n);CHKERRQ(ierr);
34575af56d4SHong Zhang   lu->B.ncol = 1;   /* Set the number of right-hand side */
346cae5a23dSHong Zhang   if (lu->options.Equil && !lu->rhs_dup) {
347cae5a23dSHong Zhang     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
348785e854fSJed Brown     ierr = PetscMalloc1(n,&lu->rhs_dup);CHKERRQ(ierr);
349cae5a23dSHong Zhang   }
350cae5a23dSHong Zhang   if (lu->options.Equil) {
351cae5a23dSHong Zhang     /* Copy b into rsh_dup */
352d9ca1df4SBarry Smith     ierr   = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
353cae5a23dSHong Zhang     ierr   = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr);
354d9ca1df4SBarry Smith     ierr   = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
355cae5a23dSHong Zhang     barray = lu->rhs_dup;
356cae5a23dSHong Zhang   } else {
357d9ca1df4SBarry Smith     ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
358cae5a23dSHong Zhang   }
35975af56d4SHong Zhang   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3605fe6150dSHong Zhang 
3615fe6150dSHong Zhang #if defined(PETSC_USE_COMPLEX)
3623cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
3633cb270beSHong Zhang   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
3643cb270beSHong Zhang   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
3653cb270beSHong Zhang #else
3665fe6150dSHong Zhang   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
3675fe6150dSHong Zhang   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
3683cb270beSHong Zhang #endif
3695fe6150dSHong Zhang #else
370d9ca1df4SBarry Smith   ((DNformat*)lu->B.Store)->nzval = (void*)barray;
37175af56d4SHong Zhang   ((DNformat*)lu->X.Store)->nzval = xarray;
3725fe6150dSHong Zhang #endif
37375af56d4SHong Zhang 
37475af56d4SHong Zhang   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
375d5f3da31SBarry Smith   if (A->factortype == MAT_FACTOR_LU) {
3768914a3f7SHong Zhang #if defined(PETSC_USE_COMPLEX)
3773cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
378d387c056SBarry Smith     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3793cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
3805db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
3813cb270beSHong Zhang #else
382d387c056SBarry Smith     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3838914a3f7SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
3845db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
3853cb270beSHong Zhang #endif
3863cb270beSHong Zhang #else
3873cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
388d387c056SBarry Smith     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3893cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
3905db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
3918914a3f7SHong Zhang #else
392d387c056SBarry Smith     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
39375af56d4SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
394c147c726SHong Zhang                                      &lu->Glu,&lu->mem_usage, &lu->stat, &info));
3958914a3f7SHong Zhang #endif
3963cb270beSHong Zhang #endif
397d5f3da31SBarry Smith   } else if (A->factortype == MAT_FACTOR_ILU) {
398cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX)
3993cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
400d387c056SBarry Smith     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
4013cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
4025db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
4033cb270beSHong Zhang #else
404d387c056SBarry Smith     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
405cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
4065db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
4073cb270beSHong Zhang #endif
4083cb270beSHong Zhang #else
4093cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
410d387c056SBarry Smith     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
4113cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
4125db81dd2SSatish Balay                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
413cffbb591SHong Zhang #else
414d387c056SBarry Smith     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
415cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
416c147c726SHong Zhang                                      &lu->Glu, &lu->mem_usage, &lu->stat, &info));
417cffbb591SHong Zhang #endif
4183cb270beSHong Zhang #endif
419f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
420cae5a23dSHong Zhang   if (!lu->options.Equil) {
421d9ca1df4SBarry Smith     ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
422cae5a23dSHong Zhang   }
42375af56d4SHong Zhang   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
42475af56d4SHong Zhang 
425958c9bccSBarry Smith   if (!info || info == lu->A.ncol+1) {
42675af56d4SHong Zhang     if (lu->options.IterRefine) {
4278914a3f7SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
4288914a3f7SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
4292205254eSKarl Rupp       for (i = 0; i < 1; ++i) {
4305d8b2efaSHong Zhang         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
43175af56d4SHong Zhang       }
4322205254eSKarl Rupp     }
4338914a3f7SHong Zhang   } else if (info > 0) {
4348914a3f7SHong Zhang     if (lu->lwork == -1) {
43577431f27SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
4368914a3f7SHong Zhang     } else {
43777431f27SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
4388914a3f7SHong Zhang     }
439f23aa3ddSBarry Smith   } else if (info < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
44014b396bbSKris Buschelman 
4418914a3f7SHong Zhang   if (lu->options.PrintStat) {
4428914a3f7SHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
443d387c056SBarry Smith     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
4448914a3f7SHong Zhang   }
44575af56d4SHong Zhang   PetscFunctionReturn(0);
44675af56d4SHong Zhang }
44714b396bbSKris Buschelman 
44814b396bbSKris Buschelman #undef __FUNCT__
449c29e884eSHong Zhang #define __FUNCT__ "MatSolve_SuperLU"
450c29e884eSHong Zhang PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
451c29e884eSHong Zhang {
452c29e884eSHong Zhang   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
453c29e884eSHong Zhang   PetscErrorCode ierr;
454c29e884eSHong Zhang 
455c29e884eSHong Zhang   PetscFunctionBegin;
4569ad64342SHong Zhang   if (A->errortype) {
4579ad64342SHong Zhang     ierr = PetscInfo(A,"MatSolve is called with singular matrix factor, skip\n");CHKERRQ(ierr);
4589ad64342SHong Zhang     ierr = VecSetInf(x);CHKERRQ(ierr);
4599ad64342SHong Zhang     PetscFunctionReturn(0);
4609ad64342SHong Zhang   }
4612205254eSKarl Rupp 
4629ad64342SHong Zhang   lu->options.Trans = TRANS;
463c29e884eSHong Zhang   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
464c29e884eSHong Zhang   PetscFunctionReturn(0);
465c29e884eSHong Zhang }
466c29e884eSHong Zhang 
467c29e884eSHong Zhang #undef __FUNCT__
468c7c1fe80SHong Zhang #define __FUNCT__ "MatSolveTranspose_SuperLU"
469c7c1fe80SHong Zhang PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
470c7c1fe80SHong Zhang {
471c7c1fe80SHong Zhang   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
472c7c1fe80SHong Zhang   PetscErrorCode ierr;
473c7c1fe80SHong Zhang 
474c7c1fe80SHong Zhang   PetscFunctionBegin;
4759ad64342SHong Zhang   if (A->errortype) {
4769ad64342SHong Zhang     ierr = PetscInfo(A,"MatSolve is called with singular matrix factor, skip\n");CHKERRQ(ierr);
4779ad64342SHong Zhang     ierr = VecSetInf(x);CHKERRQ(ierr);
4789ad64342SHong Zhang     PetscFunctionReturn(0);
4799ad64342SHong Zhang   }
4802205254eSKarl Rupp 
4819ad64342SHong Zhang   lu->options.Trans = NOTRANS;
482c29e884eSHong Zhang   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
483c7c1fe80SHong Zhang   PetscFunctionReturn(0);
484c7c1fe80SHong Zhang }
485c7c1fe80SHong Zhang 
486e0b74bf9SHong Zhang #undef __FUNCT__
487e0b74bf9SHong Zhang #define __FUNCT__ "MatMatSolve_SuperLU"
488e0b74bf9SHong Zhang PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
489e0b74bf9SHong Zhang {
490e0b74bf9SHong Zhang   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
491cd723cd1SBarry Smith   PetscBool      flg;
492cd723cd1SBarry Smith   PetscErrorCode ierr;
493e0b74bf9SHong Zhang 
494e0b74bf9SHong Zhang   PetscFunctionBegin;
4950298fd71SBarry Smith   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
496ce94432eSBarry Smith   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
4970298fd71SBarry Smith   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
498ce94432eSBarry Smith   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
4992205254eSKarl Rupp   lu->options.Trans = TRANS;
500e0b74bf9SHong Zhang   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
501e0b74bf9SHong Zhang   PetscFunctionReturn(0);
502e0b74bf9SHong Zhang }
503e0b74bf9SHong Zhang 
50414b396bbSKris Buschelman /*
50514b396bbSKris Buschelman    Note the r permutation is ignored
50614b396bbSKris Buschelman */
50714b396bbSKris Buschelman #undef __FUNCT__
508f0c56d0fSKris Buschelman #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
5099a625307SHong Zhang PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
51014b396bbSKris Buschelman {
5111d5ca7f3SHong Zhang   Mat_SuperLU *lu = (Mat_SuperLU*)(F->spptr);
512b24902e0SBarry Smith 
513b24902e0SBarry Smith   PetscFunctionBegin;
514b24902e0SBarry Smith   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
515b24902e0SBarry Smith   lu->CleanUpSuperLU      = PETSC_TRUE;
5161d5ca7f3SHong Zhang   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
517b24902e0SBarry Smith   PetscFunctionReturn(0);
518b24902e0SBarry Smith }
519b24902e0SBarry Smith 
52035bd34faSBarry Smith #undef __FUNCT__
5215ccb76cbSHong Zhang #define __FUNCT__ "MatSuperluSetILUDropTol_SuperLU"
522b2573a8aSBarry Smith static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
5235ccb76cbSHong Zhang {
5245ccb76cbSHong Zhang   Mat_SuperLU *lu= (Mat_SuperLU*)F->spptr;
5255ccb76cbSHong Zhang 
5265ccb76cbSHong Zhang   PetscFunctionBegin;
5275ccb76cbSHong Zhang   lu->options.ILU_DropTol = dtol;
5285ccb76cbSHong Zhang   PetscFunctionReturn(0);
5295ccb76cbSHong Zhang }
5305ccb76cbSHong Zhang 
5315ccb76cbSHong Zhang #undef __FUNCT__
5325ccb76cbSHong Zhang #define __FUNCT__ "MatSuperluSetILUDropTol"
5335ccb76cbSHong Zhang /*@
5345ccb76cbSHong Zhang   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
5355ccb76cbSHong Zhang    Logically Collective on Mat
5365ccb76cbSHong Zhang 
5375ccb76cbSHong Zhang    Input Parameters:
5385ccb76cbSHong Zhang +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
5395ccb76cbSHong Zhang -  dtol - drop tolerance
5405ccb76cbSHong Zhang 
5415ccb76cbSHong Zhang   Options Database:
5425ccb76cbSHong Zhang .   -mat_superlu_ilu_droptol <dtol>
5435ccb76cbSHong Zhang 
5445ccb76cbSHong Zhang    Level: beginner
5455ccb76cbSHong Zhang 
54696a0c994SBarry Smith    References:
54796a0c994SBarry Smith .      SuperLU Users' Guide
5485ccb76cbSHong Zhang 
5495ccb76cbSHong Zhang .seealso: MatGetFactor()
5505ccb76cbSHong Zhang @*/
5515ccb76cbSHong Zhang PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
5525ccb76cbSHong Zhang {
5535ccb76cbSHong Zhang   PetscErrorCode ierr;
5545ccb76cbSHong Zhang 
5555ccb76cbSHong Zhang   PetscFunctionBegin;
5565ccb76cbSHong Zhang   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
55769fbec6eSBarry Smith   PetscValidLogicalCollectiveReal(F,dtol,2);
5585ccb76cbSHong Zhang   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
5595ccb76cbSHong Zhang   PetscFunctionReturn(0);
5605ccb76cbSHong Zhang }
5615ccb76cbSHong Zhang 
5625ccb76cbSHong Zhang #undef __FUNCT__
56335bd34faSBarry Smith #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu"
56435bd34faSBarry Smith PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
56535bd34faSBarry Smith {
56635bd34faSBarry Smith   PetscFunctionBegin;
5672692d6eeSBarry Smith   *type = MATSOLVERSUPERLU;
56835bd34faSBarry Smith   PetscFunctionReturn(0);
56935bd34faSBarry Smith }
57035bd34faSBarry Smith 
571b24902e0SBarry Smith 
572b24902e0SBarry Smith /*MC
573ba992d64SSatish Balay   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
574b24902e0SBarry Smith   via the external package SuperLU.
575b24902e0SBarry Smith 
576e2e64c6bSBarry Smith   Use ./configure --download-superlu to have PETSc installed with SuperLU
577b24902e0SBarry Smith 
578c2b89b5dSBarry Smith   Use -pc_type lu -pc_factor_mat_solver_package superlu to us this direct solver
579c2b89b5dSBarry Smith 
580b24902e0SBarry Smith   Options Database Keys:
581e08999f5SMatthew G Knepley + -mat_superlu_equil <FALSE>            - Equil (None)
582e08999f5SMatthew G Knepley . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
583e08999f5SMatthew G Knepley . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
584e08999f5SMatthew G Knepley . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
585e08999f5SMatthew G Knepley . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
586e08999f5SMatthew G Knepley . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
587e08999f5SMatthew G Knepley . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
588e08999f5SMatthew G Knepley . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
589e08999f5SMatthew G Knepley . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
590e08999f5SMatthew G Knepley . -mat_superlu_printstat <FALSE>        - PrintStat (None)
591e08999f5SMatthew G Knepley . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
592e08999f5SMatthew G Knepley . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
593e08999f5SMatthew G Knepley . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
594e08999f5SMatthew G Knepley . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
595e08999f5SMatthew G Knepley . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
596e08999f5SMatthew G Knepley . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
597e08999f5SMatthew G Knepley - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
598b24902e0SBarry Smith 
5992692d6eeSBarry Smith    Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
6005c9eb25fSBarry Smith 
601b24902e0SBarry Smith    Level: beginner
602b24902e0SBarry Smith 
603d45987f3SHong Zhang .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage
604b24902e0SBarry Smith M*/
605b24902e0SBarry Smith 
606b24902e0SBarry Smith #undef __FUNCT__
607b24902e0SBarry Smith #define __FUNCT__ "MatGetFactor_seqaij_superlu"
608db87b0f2SBarry Smith static PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
609b24902e0SBarry Smith {
61014b396bbSKris Buschelman   Mat            B;
611f0c56d0fSKris Buschelman   Mat_SuperLU    *lu;
6126849ba73SBarry Smith   PetscErrorCode ierr;
6135d8b2efaSHong Zhang   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
6148afaa268SBarry Smith   PetscBool      flg,set;
6153cb270beSHong Zhang   PetscReal      real_input;
6165ca28756SHong Zhang   const char     *colperm[]   ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
6175ca28756SHong Zhang   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
6185ca28756SHong Zhang   const char     *rowperm[]   ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
61914b396bbSKris Buschelman 
62014b396bbSKris Buschelman   PetscFunctionBegin;
621ce94432eSBarry Smith   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
622d0f46423SBarry Smith   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
6237adad957SLisandro Dalcin   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
6240298fd71SBarry Smith   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
625f0c56d0fSKris Buschelman 
626cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
627b24902e0SBarry Smith     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
628cffbb591SHong Zhang     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
629b3a44c85SBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
630cffbb591SHong Zhang 
631*00c67f3bSHong Zhang   ierr = PetscFree(B->solvertype);CHKERRQ(ierr);
632*00c67f3bSHong Zhang   ierr = PetscStrallocpy(MATSOLVERSUPERLU,&B->solvertype);CHKERRQ(ierr);
633*00c67f3bSHong Zhang 
634b24902e0SBarry Smith   B->ops->destroy     = MatDestroy_SuperLU;
6353519fcdcSHong Zhang   B->ops->view        = MatView_SuperLU;
63620be8e61SHong Zhang   B->ops->getdiagonal = MatGetDiagonal_SuperLU;
637d5f3da31SBarry Smith   B->factortype       = ftype;
63894b7f48cSBarry Smith   B->assembled        = PETSC_TRUE;           /* required by -ksp_view */
6395c9eb25fSBarry Smith   B->preallocated     = PETSC_TRUE;
64014b396bbSKris Buschelman 
641b00a9115SJed Brown   ierr = PetscNewLog(B,&lu);CHKERRQ(ierr);
642cae5a23dSHong Zhang 
643cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU) {
6449ce50919SHong Zhang     set_default_options(&lu->options);
6453d6581fbSHong Zhang     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
6463d6581fbSHong Zhang       "Whether or not the system will be equilibrated depends on the
6473d6581fbSHong Zhang        scaling of the matrix A, but if equilibration is used, A is
6483d6581fbSHong Zhang        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
6493d6581fbSHong Zhang        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
6503d6581fbSHong Zhang      We set 'options.Equil = NO' as default because additional space is needed for it.
6513d6581fbSHong Zhang     */
6523d6581fbSHong Zhang     lu->options.Equil = NO;
653cffbb591SHong Zhang   } else if (ftype == MAT_FACTOR_ILU) {
6540c74a584SJed Brown     /* Set the default input options of ilu: */
655d387c056SBarry Smith     PetscStackCall("SuperLU:ilu_set_default_options",ilu_set_default_options(&lu->options));
656cffbb591SHong Zhang   }
6579ce50919SHong Zhang   lu->options.PrintStat = NO;
6581a2e2f44SHong Zhang 
6595d8b2efaSHong Zhang   /* Initialize the statistics variables. */
660d387c056SBarry Smith   PetscStackCall("SuperLU:StatInit",StatInit(&lu->stat));
6618914a3f7SHong Zhang   lu->lwork = 0;   /* allocate space internally by system malloc */
6629ce50919SHong Zhang 
663ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
6648afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,NULL);CHKERRQ(ierr);
6658914a3f7SHong Zhang   ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
6662205254eSKarl Rupp   if (flg) lu->options.ColPerm = (colperm_t)indx;
6678914a3f7SHong Zhang   ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
6682205254eSKarl Rupp   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
6698afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,&set);CHKERRQ(ierr);
6708afaa268SBarry Smith   if (set && flg) lu->options.SymmetricMode = YES;
6713cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
6723cb270beSHong Zhang   if (flg) lu->options.DiagPivotThresh = (double) real_input;
6738afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,&set);CHKERRQ(ierr);
6748afaa268SBarry Smith   if (set && flg) lu->options.PivotGrowth = YES;
6758afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,&set);CHKERRQ(ierr);
6768afaa268SBarry Smith   if (set && flg) lu->options.ConditionNumber = YES;
677d7ebd59bSHong Zhang   ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
6782205254eSKarl Rupp   if (flg) lu->options.RowPerm = (rowperm_t)indx;
6798afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,&set);CHKERRQ(ierr);
6808afaa268SBarry Smith   if (set && flg) lu->options.ReplaceTinyPivot = YES;
6818afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,&set);CHKERRQ(ierr);
6828afaa268SBarry Smith   if (set && flg) lu->options.PrintStat = YES;
6830298fd71SBarry Smith   ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,NULL);CHKERRQ(ierr);
6845fe6150dSHong Zhang   if (lu->lwork > 0) {
685d87de817SBarry Smith     /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/
6865fe6150dSHong Zhang     ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
6875fe6150dSHong Zhang   } else if (lu->lwork != 0 && lu->lwork != -1) {
68877431f27SBarry Smith     ierr      = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
6898914a3f7SHong Zhang     lu->lwork = 0;
6908914a3f7SHong Zhang   }
691cffbb591SHong Zhang   /* ilu options */
6923cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
6933cb270beSHong Zhang   if (flg) lu->options.ILU_DropTol = (double) real_input;
6943cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
6953cb270beSHong Zhang   if (flg) lu->options.ILU_FillTol = (double) real_input;
6963cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
6973cb270beSHong Zhang   if (flg) lu->options.ILU_FillFactor = (double) real_input;
6980298fd71SBarry Smith   ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,NULL);CHKERRQ(ierr);
699cffbb591SHong Zhang   ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
7002205254eSKarl Rupp   if (flg) lu->options.ILU_Norm = (norm_t)indx;
701cffbb591SHong Zhang   ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
7022205254eSKarl Rupp   if (flg) lu->options.ILU_MILU = (milu_t)indx;
7039ce50919SHong Zhang   PetscOptionsEnd();
70438abfdaeSHong Zhang   if (lu->options.Equil == YES) {
70538abfdaeSHong Zhang     /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
70638abfdaeSHong Zhang     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
70738abfdaeSHong Zhang   }
7089ce50919SHong Zhang 
7095d8b2efaSHong Zhang   /* Allocate spaces (notice sizes are for the transpose) */
710785e854fSJed Brown   ierr = PetscMalloc1(m,&lu->etree);CHKERRQ(ierr);
711785e854fSJed Brown   ierr = PetscMalloc1(n,&lu->perm_r);CHKERRQ(ierr);
712785e854fSJed Brown   ierr = PetscMalloc1(m,&lu->perm_c);CHKERRQ(ierr);
713785e854fSJed Brown   ierr = PetscMalloc1(n,&lu->R);CHKERRQ(ierr);
714785e854fSJed Brown   ierr = PetscMalloc1(m,&lu->C);CHKERRQ(ierr);
7155d8b2efaSHong Zhang 
7165d8b2efaSHong Zhang   /* create rhs and solution x without allocate space for .Store */
7175d8b2efaSHong Zhang #if defined(PETSC_USE_COMPLEX)
7183cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
719d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
720d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
7213cb270beSHong Zhang #else
722d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
723d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
7243cb270beSHong Zhang #endif
7253cb270beSHong Zhang #else
7263cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
727d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
728d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
7295d8b2efaSHong Zhang #else
730d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
731d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
7325d8b2efaSHong Zhang #endif
7333cb270beSHong Zhang #endif
7345d8b2efaSHong Zhang 
735bdf89e91SBarry Smith   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
736bdf89e91SBarry Smith   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatSuperluSetILUDropTol_C",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
7375c9eb25fSBarry Smith   B->spptr = lu;
73820be8e61SHong Zhang 
739899d7b4fSKris Buschelman   *F       = B;
74014b396bbSKris Buschelman   PetscFunctionReturn(0);
74114b396bbSKris Buschelman }
742d954db57SHong Zhang 
74342c9c57cSBarry Smith #undef __FUNCT__
74442c9c57cSBarry Smith #define __FUNCT__ "MatSolverPackageRegister_SuperLU"
74529b38603SBarry Smith PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_SuperLU(void)
74642c9c57cSBarry Smith {
74742c9c57cSBarry Smith   PetscErrorCode ierr;
74842c9c57cSBarry Smith 
74942c9c57cSBarry Smith   PetscFunctionBegin;
75042c9c57cSBarry Smith   ierr = MatSolverPackageRegister(MATSOLVERSUPERLU,MATSEQAIJ,       MAT_FACTOR_LU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr);
75142c9c57cSBarry Smith   ierr = MatSolverPackageRegister(MATSOLVERSUPERLU,MATSEQAIJ,       MAT_FACTOR_ILU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr);
75242c9c57cSBarry Smith   PetscFunctionReturn(0);
75342c9c57cSBarry Smith }
754