xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision d9ca1df42eda25bc875149d6d493aad6e204c9ff)
114b396bbSKris Buschelman 
25a46d3faSBarry Smith /*  --------------------------------------------------------------------
35a46d3faSBarry Smith 
45a46d3faSBarry Smith      This file implements a subclass of the SeqAIJ matrix class that uses
55d8b2efaSHong Zhang      the SuperLU sparse solver. You can use this as a starting point for
65a46d3faSBarry Smith      implementing your own subclass of a PETSc matrix class.
75a46d3faSBarry Smith 
85a46d3faSBarry Smith      This demonstrates a way to make an implementation inheritence of a PETSc
95a46d3faSBarry Smith      matrix type. This means constructing a new matrix type (SuperLU) by changing some
105a46d3faSBarry Smith      of the methods of the previous type (SeqAIJ), adding additional data, and possibly
115a46d3faSBarry Smith      additional method. (See any book on object oriented programming).
1214b396bbSKris Buschelman */
1314b396bbSKris Buschelman 
145a46d3faSBarry Smith /*
155a46d3faSBarry Smith      Defines the data structure for the base matrix type (SeqAIJ)
165a46d3faSBarry Smith */
17c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
1814b396bbSKris Buschelman 
195a46d3faSBarry Smith /*
205a46d3faSBarry Smith      SuperLU include files
215a46d3faSBarry Smith */
2214b396bbSKris Buschelman EXTERN_C_BEGIN
2314b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
243cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
253cb270beSHong Zhang #include <slu_cdefs.h>
263cb270beSHong Zhang #else
27c6db04a5SJed Brown #include <slu_zdefs.h>
283cb270beSHong Zhang #endif
293cb270beSHong Zhang #else
303cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
313cb270beSHong Zhang #include <slu_sdefs.h>
3214b396bbSKris Buschelman #else
33c6db04a5SJed Brown #include <slu_ddefs.h>
3414b396bbSKris Buschelman #endif
353cb270beSHong Zhang #endif
36c6db04a5SJed Brown #include <slu_util.h>
3714b396bbSKris Buschelman EXTERN_C_END
3814b396bbSKris Buschelman 
395a46d3faSBarry Smith /*
405a46d3faSBarry Smith      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
415a46d3faSBarry Smith */
4214b396bbSKris Buschelman typedef struct {
4375af56d4SHong Zhang   SuperMatrix       A,L,U,B,X;
4475af56d4SHong Zhang   superlu_options_t options;
45da7a1d00SHong Zhang   PetscInt          *perm_c; /* column permutation vector */
46da7a1d00SHong Zhang   PetscInt          *perm_r; /* row permutations from partial pivoting */
47da7a1d00SHong Zhang   PetscInt          *etree;
48da7a1d00SHong Zhang   PetscReal         *R, *C;
4975af56d4SHong Zhang   char              equed[1];
50da7a1d00SHong Zhang   PetscInt          lwork;
5175af56d4SHong Zhang   void              *work;
52da7a1d00SHong Zhang   PetscReal         rpg, rcond;
5375af56d4SHong Zhang   mem_usage_t       mem_usage;
5475af56d4SHong Zhang   MatStructure      flg;
555d8b2efaSHong Zhang   SuperLUStat_t     stat;
56cae5a23dSHong Zhang   Mat               A_dup;
57cae5a23dSHong Zhang   PetscScalar       *rhs_dup;
5814b396bbSKris Buschelman 
5914b396bbSKris Buschelman   /* Flag to clean up (non-global) SuperLU objects during Destroy */
60ace3abfcSBarry Smith   PetscBool CleanUpSuperLU;
61f0c56d0fSKris Buschelman } Mat_SuperLU;
6214b396bbSKris Buschelman 
63e0e586b9SHong Zhang extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
640481f469SBarry Smith extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo*);
65e0e586b9SHong Zhang extern PetscErrorCode MatDestroy_SuperLU(Mat);
66e0e586b9SHong Zhang extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
67e0e586b9SHong Zhang extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
68e0e586b9SHong Zhang extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
69e0b74bf9SHong Zhang extern PetscErrorCode MatMatSolve_SuperLU(Mat,Mat,Mat);
70e0e586b9SHong Zhang extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
710481f469SBarry Smith extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*);
72e0e586b9SHong Zhang extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat*);
73e0e586b9SHong Zhang 
745a46d3faSBarry Smith /*
755a46d3faSBarry Smith     Utility function
765a46d3faSBarry Smith */
775a46d3faSBarry Smith #undef __FUNCT__
785a46d3faSBarry Smith #define __FUNCT__ "MatFactorInfo_SuperLU"
795a46d3faSBarry Smith PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
805a46d3faSBarry Smith {
815a46d3faSBarry Smith   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
825a46d3faSBarry Smith   PetscErrorCode    ierr;
835a46d3faSBarry Smith   superlu_options_t options;
845a46d3faSBarry Smith 
855a46d3faSBarry Smith   PetscFunctionBegin;
865a46d3faSBarry Smith   /* check if matrix is superlu_dist type */
875a46d3faSBarry Smith   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
885a46d3faSBarry Smith 
895a46d3faSBarry Smith   options = lu->options;
902205254eSKarl Rupp 
915a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
925a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES" : "NO");CHKERRQ(ierr);
935a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
945a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
955a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES" : "NO");CHKERRQ(ierr);
965a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
975a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES" : "NO");CHKERRQ(ierr);
985a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES" : "NO");CHKERRQ(ierr);
995a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
1005a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES" : "NO");CHKERRQ(ierr);
1015a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES" : "NO");CHKERRQ(ierr);
1025a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
103d5f3da31SBarry Smith   if (A->factortype == MAT_FACTOR_ILU) {
104cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
105cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
106cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
107cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr);
108cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr);
109cffbb591SHong Zhang     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr);
110cffbb591SHong Zhang   }
1115a46d3faSBarry Smith   PetscFunctionReturn(0);
1125a46d3faSBarry Smith }
1135a46d3faSBarry Smith 
1145a46d3faSBarry Smith /*
1155a46d3faSBarry Smith     These are the methods provided to REPLACE the corresponding methods of the
1165a46d3faSBarry Smith    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
1175a46d3faSBarry Smith */
1185a46d3faSBarry Smith #undef __FUNCT__
1195a46d3faSBarry Smith #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
1200481f469SBarry Smith PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
1215a46d3faSBarry Smith {
1221d5ca7f3SHong Zhang   Mat_SuperLU    *lu = (Mat_SuperLU*)F->spptr;
123cae5a23dSHong Zhang   Mat_SeqAIJ     *aa;
1245a46d3faSBarry Smith   PetscErrorCode ierr;
1255a46d3faSBarry Smith   PetscInt       sinfo;
1265a46d3faSBarry Smith   PetscReal      ferr, berr;
1275a46d3faSBarry Smith   NCformat       *Ustore;
1285a46d3faSBarry Smith   SCformat       *Lstore;
1295a46d3faSBarry Smith 
1305a46d3faSBarry Smith   PetscFunctionBegin;
1315a46d3faSBarry Smith   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
1325a46d3faSBarry Smith     lu->options.Fact = SamePattern;
1335a46d3faSBarry Smith     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
1345a46d3faSBarry Smith     Destroy_SuperMatrix_Store(&lu->A);
135cae5a23dSHong Zhang     if (lu->options.Equil) {
136cae5a23dSHong Zhang       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
137cae5a23dSHong Zhang     }
1385a46d3faSBarry Smith     if (lu->lwork >= 0) {
139d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
140d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
1415a46d3faSBarry Smith       lu->options.Fact = SamePattern;
1425a46d3faSBarry Smith     }
1435a46d3faSBarry Smith   }
1445a46d3faSBarry Smith 
1455a46d3faSBarry Smith   /* Create the SuperMatrix for lu->A=A^T:
1465a46d3faSBarry Smith        Since SuperLU likes column-oriented matrices,we pass it the transpose,
1475a46d3faSBarry Smith        and then solve A^T X = B in MatSolve(). */
148cae5a23dSHong Zhang   if (lu->options.Equil) {
149cae5a23dSHong Zhang     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
150cae5a23dSHong Zhang   } else {
151cae5a23dSHong Zhang     aa = (Mat_SeqAIJ*)(A)->data;
152cae5a23dSHong Zhang   }
1535a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
1543cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
155d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_CompCol_Matrix",cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE));
1563cb270beSHong Zhang #else
157d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_CompCol_Matrix",zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE));
1583cb270beSHong Zhang #endif
1593cb270beSHong Zhang #else
1603cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
161d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_CompCol_Matrix",sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE));
1625a46d3faSBarry Smith #else
163d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_CompCol_Matrix",dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE));
1645a46d3faSBarry Smith #endif
1653cb270beSHong Zhang #endif
1665a46d3faSBarry Smith 
1675a46d3faSBarry Smith   /* Numerical factorization */
1685a46d3faSBarry Smith   lu->B.ncol = 0;  /* Indicate not to solve the system */
169d5f3da31SBarry Smith   if (F->factortype == MAT_FACTOR_LU) {
1705a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
1713cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
172d387c056SBarry Smith     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
1733cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
174d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &sinfo));
1753cb270beSHong Zhang #else
176d387c056SBarry Smith     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
1775a46d3faSBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
178d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &sinfo));
1793cb270beSHong Zhang #endif
1803cb270beSHong Zhang #else
1813cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
182d387c056SBarry Smith     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
1833cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
184d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &sinfo));
1855a46d3faSBarry Smith #else
186d387c056SBarry Smith     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
1875a46d3faSBarry Smith                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
188d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &sinfo));
1895a46d3faSBarry Smith #endif
1903cb270beSHong Zhang #endif
191d5f3da31SBarry Smith   } else if (F->factortype == MAT_FACTOR_ILU) {
192cffbb591SHong Zhang     /* Compute the incomplete factorization, condition number and pivot growth */
193cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX)
1943cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
195d387c056SBarry Smith     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
1963cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
197d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &sinfo));
1983cb270beSHong Zhang #else
199d387c056SBarry Smith     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
200cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
201d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &sinfo));
2023cb270beSHong Zhang #endif
2033cb270beSHong Zhang #else
2043cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
205d387c056SBarry Smith     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
2063cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
207d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &sinfo));
208cffbb591SHong Zhang #else
209d387c056SBarry Smith     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
210cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
211d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &sinfo));
212cffbb591SHong Zhang #endif
2133cb270beSHong Zhang #endif
214f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
2155a46d3faSBarry Smith   if (!sinfo || sinfo == lu->A.ncol+1) {
2162205254eSKarl Rupp     if (lu->options.PivotGrowth) {
2175a46d3faSBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);
2182205254eSKarl Rupp     }
2192205254eSKarl Rupp     if (lu->options.ConditionNumber) {
2205a46d3faSBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);
2212205254eSKarl Rupp     }
2225a46d3faSBarry Smith   } else if (sinfo > 0) {
2235a46d3faSBarry Smith     if (lu->lwork == -1) {
2245a46d3faSBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol);
2259308c137SBarry Smith     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo);
226f23aa3ddSBarry Smith   } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
2275a46d3faSBarry Smith 
2285a46d3faSBarry Smith   if (lu->options.PrintStat) {
2295a46d3faSBarry Smith     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");
230d387c056SBarry Smith     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
2315a46d3faSBarry Smith     Lstore = (SCformat*) lu->L.Store;
2325a46d3faSBarry Smith     Ustore = (NCformat*) lu->U.Store;
2335a46d3faSBarry Smith     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
2345a46d3faSBarry Smith     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
2355a46d3faSBarry Smith     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
2366da386baSHong Zhang     ierr   = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
2376da386baSHong Zhang                          lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
2385a46d3faSBarry Smith   }
2395a46d3faSBarry Smith 
2405a46d3faSBarry Smith   lu->flg                = SAME_NONZERO_PATTERN;
2411d5ca7f3SHong Zhang   F->ops->solve          = MatSolve_SuperLU;
2421d5ca7f3SHong Zhang   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
243e0b74bf9SHong Zhang   F->ops->matsolve       = MatMatSolve_SuperLU;
2445a46d3faSBarry Smith   PetscFunctionReturn(0);
2455a46d3faSBarry Smith }
2465a46d3faSBarry Smith 
24714b396bbSKris Buschelman #undef __FUNCT__
24820be8e61SHong Zhang #define __FUNCT__ "MatGetDiagonal_SuperLU"
24920be8e61SHong Zhang PetscErrorCode MatGetDiagonal_SuperLU(Mat A,Vec v)
25020be8e61SHong Zhang {
25120be8e61SHong Zhang   PetscFunctionBegin;
25220be8e61SHong Zhang   SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Mat type: SuperLU factor");
25320be8e61SHong Zhang   PetscFunctionReturn(0);
25420be8e61SHong Zhang }
25520be8e61SHong Zhang 
25620be8e61SHong Zhang #undef __FUNCT__
257f0c56d0fSKris Buschelman #define __FUNCT__ "MatDestroy_SuperLU"
258dfbe8321SBarry Smith PetscErrorCode MatDestroy_SuperLU(Mat A)
25914b396bbSKris Buschelman {
260dfbe8321SBarry Smith   PetscErrorCode ierr;
261f0c56d0fSKris Buschelman   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
26214b396bbSKris Buschelman 
26314b396bbSKris Buschelman   PetscFunctionBegin;
264bf0cc555SLisandro Dalcin   if (lu && lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
265d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->A));
266d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->B));
267d387c056SBarry Smith     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->X));
268d387c056SBarry Smith     PetscStackCall("SuperLU:StatFree",StatFree(&lu->stat));
2690e742b69SHong Zhang     if (lu->lwork >= 0) {
270d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
271d387c056SBarry Smith       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
2720e742b69SHong Zhang     }
2730e742b69SHong Zhang   }
274bf0cc555SLisandro Dalcin   if (lu) {
2759ce50919SHong Zhang     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
2769ce50919SHong Zhang     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
2779ce50919SHong Zhang     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
2789ce50919SHong Zhang     ierr = PetscFree(lu->R);CHKERRQ(ierr);
2799ce50919SHong Zhang     ierr = PetscFree(lu->C);CHKERRQ(ierr);
280bf0cc555SLisandro Dalcin     ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
281bf0cc555SLisandro Dalcin     ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
282bf0cc555SLisandro Dalcin   }
283bf0cc555SLisandro Dalcin   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
2840e742b69SHong Zhang 
285d954db57SHong Zhang   /* clear composed functions */
286bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr);
287bdf89e91SBarry Smith   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluSetILUDropTol_C",NULL);CHKERRQ(ierr);
288d954db57SHong Zhang 
289b24902e0SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
29014b396bbSKris Buschelman   PetscFunctionReturn(0);
29114b396bbSKris Buschelman }
29214b396bbSKris Buschelman 
29314b396bbSKris Buschelman #undef __FUNCT__
294f0c56d0fSKris Buschelman #define __FUNCT__ "MatView_SuperLU"
295dfbe8321SBarry Smith PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
29614b396bbSKris Buschelman {
297dfbe8321SBarry Smith   PetscErrorCode    ierr;
298ace3abfcSBarry Smith   PetscBool         iascii;
29914b396bbSKris Buschelman   PetscViewerFormat format;
30014b396bbSKris Buschelman 
30114b396bbSKris Buschelman   PetscFunctionBegin;
302251f4c67SDmitry Karpeev   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
30332077d6dSBarry Smith   if (iascii) {
30414b396bbSKris Buschelman     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
3052f59403fSHong Zhang     if (format == PETSC_VIEWER_ASCII_INFO) {
306f0c56d0fSKris Buschelman       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
30714b396bbSKris Buschelman     }
30814b396bbSKris Buschelman   }
30914b396bbSKris Buschelman   PetscFunctionReturn(0);
31014b396bbSKris Buschelman }
31114b396bbSKris Buschelman 
31214b396bbSKris Buschelman 
31314b396bbSKris Buschelman #undef __FUNCT__
314c29e884eSHong Zhang #define __FUNCT__ "MatSolve_SuperLU_Private"
315c29e884eSHong Zhang PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
31614b396bbSKris Buschelman {
317f0c56d0fSKris Buschelman   Mat_SuperLU       *lu = (Mat_SuperLU*)A->spptr;
318*d9ca1df4SBarry Smith   const PetscScalar *barray;
319*d9ca1df4SBarry Smith   PetscScalar       *xarray;
320dfbe8321SBarry Smith   PetscErrorCode    ierr;
321077aedafSJed Brown   PetscInt          info,i,n;
322da7a1d00SHong Zhang   PetscReal         ferr,berr;
323dff31646SBarry Smith   static PetscBool  cite = PETSC_FALSE;
32414b396bbSKris Buschelman 
32514b396bbSKris Buschelman   PetscFunctionBegin;
3262205254eSKarl Rupp   if (lu->lwork == -1) PetscFunctionReturn(0);
327dff31646SBarry Smith   ierr = PetscCitationsRegister("@article{superlu99,\n  author  = {James W. Demmel and Stanley C. Eisenstat and\n             John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n  title = {A supernodal approach to sparse partial pivoting},\n  journal = {SIAM J. Matrix Analysis and Applications},\n  year = {1999},\n  volume  = {20},\n  number = {3},\n  pages = {720-755}\n}\n",&cite);CHKERRQ(ierr);
328cae5a23dSHong Zhang 
329077aedafSJed Brown   ierr = VecGetLocalSize(x,&n);CHKERRQ(ierr);
33075af56d4SHong Zhang   lu->B.ncol = 1;   /* Set the number of right-hand side */
331cae5a23dSHong Zhang   if (lu->options.Equil && !lu->rhs_dup) {
332cae5a23dSHong Zhang     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
333785e854fSJed Brown     ierr = PetscMalloc1(n,&lu->rhs_dup);CHKERRQ(ierr);
334cae5a23dSHong Zhang   }
335cae5a23dSHong Zhang   if (lu->options.Equil) {
336cae5a23dSHong Zhang     /* Copy b into rsh_dup */
337*d9ca1df4SBarry Smith     ierr   = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
338cae5a23dSHong Zhang     ierr   = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr);
339*d9ca1df4SBarry Smith     ierr   = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
340cae5a23dSHong Zhang     barray = lu->rhs_dup;
341cae5a23dSHong Zhang   } else {
342*d9ca1df4SBarry Smith     ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr);
343cae5a23dSHong Zhang   }
34475af56d4SHong Zhang   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
3455fe6150dSHong Zhang 
3465fe6150dSHong Zhang #if defined(PETSC_USE_COMPLEX)
3473cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
3483cb270beSHong Zhang   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
3493cb270beSHong Zhang   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
3503cb270beSHong Zhang #else
3515fe6150dSHong Zhang   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
3525fe6150dSHong Zhang   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
3533cb270beSHong Zhang #endif
3545fe6150dSHong Zhang #else
355*d9ca1df4SBarry Smith   ((DNformat*)lu->B.Store)->nzval = (void*)barray;
35675af56d4SHong Zhang   ((DNformat*)lu->X.Store)->nzval = xarray;
3575fe6150dSHong Zhang #endif
35875af56d4SHong Zhang 
35975af56d4SHong Zhang   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
360d5f3da31SBarry Smith   if (A->factortype == MAT_FACTOR_LU) {
3618914a3f7SHong Zhang #if defined(PETSC_USE_COMPLEX)
3623cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
363d387c056SBarry Smith     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3643cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
365d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &info));
3663cb270beSHong Zhang #else
367d387c056SBarry Smith     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3688914a3f7SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
369d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &info));
3703cb270beSHong Zhang #endif
3713cb270beSHong Zhang #else
3723cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
373d387c056SBarry Smith     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3743cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
375d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &info));
3768914a3f7SHong Zhang #else
377d387c056SBarry Smith     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
37875af56d4SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
379d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &info));
3808914a3f7SHong Zhang #endif
3813cb270beSHong Zhang #endif
382d5f3da31SBarry Smith   } else if (A->factortype == MAT_FACTOR_ILU) {
383cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX)
3843cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
385d387c056SBarry Smith     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3863cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
387d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &info));
3883cb270beSHong Zhang #else
389d387c056SBarry Smith     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
390cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
391d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &info));
3923cb270beSHong Zhang #endif
3933cb270beSHong Zhang #else
3943cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
395d387c056SBarry Smith     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
3963cb270beSHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
397d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &info));
398cffbb591SHong Zhang #else
399d387c056SBarry Smith     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
400cffbb591SHong Zhang                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
401d387c056SBarry Smith                                      &lu->mem_usage, &lu->stat, &info));
402cffbb591SHong Zhang #endif
4033cb270beSHong Zhang #endif
404f23aa3ddSBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
405cae5a23dSHong Zhang   if (!lu->options.Equil) {
406*d9ca1df4SBarry Smith     ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr);
407cae5a23dSHong Zhang   }
40875af56d4SHong Zhang   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
40975af56d4SHong Zhang 
410958c9bccSBarry Smith   if (!info || info == lu->A.ncol+1) {
41175af56d4SHong Zhang     if (lu->options.IterRefine) {
4128914a3f7SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
4138914a3f7SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
4142205254eSKarl Rupp       for (i = 0; i < 1; ++i) {
4155d8b2efaSHong Zhang         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
41675af56d4SHong Zhang       }
4172205254eSKarl Rupp     }
4188914a3f7SHong Zhang   } else if (info > 0) {
4198914a3f7SHong Zhang     if (lu->lwork == -1) {
42077431f27SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
4218914a3f7SHong Zhang     } else {
42277431f27SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
4238914a3f7SHong Zhang     }
424f23aa3ddSBarry Smith   } else if (info < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
42514b396bbSKris Buschelman 
4268914a3f7SHong Zhang   if (lu->options.PrintStat) {
4278914a3f7SHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
428d387c056SBarry Smith     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
4298914a3f7SHong Zhang   }
43075af56d4SHong Zhang   PetscFunctionReturn(0);
43175af56d4SHong Zhang }
43214b396bbSKris Buschelman 
43314b396bbSKris Buschelman #undef __FUNCT__
434c29e884eSHong Zhang #define __FUNCT__ "MatSolve_SuperLU"
435c29e884eSHong Zhang PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
436c29e884eSHong Zhang {
437c29e884eSHong Zhang   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
438c29e884eSHong Zhang   PetscErrorCode ierr;
439c29e884eSHong Zhang 
440c29e884eSHong Zhang   PetscFunctionBegin;
441c29e884eSHong Zhang   lu->options.Trans = TRANS;
4422205254eSKarl Rupp 
443c29e884eSHong Zhang   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
444c29e884eSHong Zhang   PetscFunctionReturn(0);
445c29e884eSHong Zhang }
446c29e884eSHong Zhang 
447c29e884eSHong Zhang #undef __FUNCT__
448c7c1fe80SHong Zhang #define __FUNCT__ "MatSolveTranspose_SuperLU"
449c7c1fe80SHong Zhang PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
450c7c1fe80SHong Zhang {
451c7c1fe80SHong Zhang   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
452c7c1fe80SHong Zhang   PetscErrorCode ierr;
453c7c1fe80SHong Zhang 
454c7c1fe80SHong Zhang   PetscFunctionBegin;
455c7c1fe80SHong Zhang   lu->options.Trans = NOTRANS;
4562205254eSKarl Rupp 
457c29e884eSHong Zhang   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
458c7c1fe80SHong Zhang   PetscFunctionReturn(0);
459c7c1fe80SHong Zhang }
460c7c1fe80SHong Zhang 
461e0b74bf9SHong Zhang #undef __FUNCT__
462e0b74bf9SHong Zhang #define __FUNCT__ "MatMatSolve_SuperLU"
463e0b74bf9SHong Zhang PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
464e0b74bf9SHong Zhang {
465e0b74bf9SHong Zhang   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
466cd723cd1SBarry Smith   PetscBool      flg;
467cd723cd1SBarry Smith   PetscErrorCode ierr;
468e0b74bf9SHong Zhang 
469e0b74bf9SHong Zhang   PetscFunctionBegin;
4700298fd71SBarry Smith   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
471ce94432eSBarry Smith   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
4720298fd71SBarry Smith   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
473ce94432eSBarry Smith   if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
4742205254eSKarl Rupp   lu->options.Trans = TRANS;
475e0b74bf9SHong Zhang   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
476e0b74bf9SHong Zhang   PetscFunctionReturn(0);
477e0b74bf9SHong Zhang }
478e0b74bf9SHong Zhang 
47914b396bbSKris Buschelman /*
48014b396bbSKris Buschelman    Note the r permutation is ignored
48114b396bbSKris Buschelman */
48214b396bbSKris Buschelman #undef __FUNCT__
483f0c56d0fSKris Buschelman #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
4840481f469SBarry Smith PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
48514b396bbSKris Buschelman {
4861d5ca7f3SHong Zhang   Mat_SuperLU *lu = (Mat_SuperLU*)(F->spptr);
487b24902e0SBarry Smith 
488b24902e0SBarry Smith   PetscFunctionBegin;
489b24902e0SBarry Smith   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
490b24902e0SBarry Smith   lu->CleanUpSuperLU      = PETSC_TRUE;
4911d5ca7f3SHong Zhang   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
492b24902e0SBarry Smith   PetscFunctionReturn(0);
493b24902e0SBarry Smith }
494b24902e0SBarry Smith 
49535bd34faSBarry Smith #undef __FUNCT__
4965ccb76cbSHong Zhang #define __FUNCT__ "MatSuperluSetILUDropTol_SuperLU"
497b2573a8aSBarry Smith static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
4985ccb76cbSHong Zhang {
4995ccb76cbSHong Zhang   Mat_SuperLU *lu= (Mat_SuperLU*)F->spptr;
5005ccb76cbSHong Zhang 
5015ccb76cbSHong Zhang   PetscFunctionBegin;
5025ccb76cbSHong Zhang   lu->options.ILU_DropTol = dtol;
5035ccb76cbSHong Zhang   PetscFunctionReturn(0);
5045ccb76cbSHong Zhang }
5055ccb76cbSHong Zhang 
5065ccb76cbSHong Zhang #undef __FUNCT__
5075ccb76cbSHong Zhang #define __FUNCT__ "MatSuperluSetILUDropTol"
5085ccb76cbSHong Zhang /*@
5095ccb76cbSHong Zhang   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
5105ccb76cbSHong Zhang    Logically Collective on Mat
5115ccb76cbSHong Zhang 
5125ccb76cbSHong Zhang    Input Parameters:
5135ccb76cbSHong Zhang +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
5145ccb76cbSHong Zhang -  dtol - drop tolerance
5155ccb76cbSHong Zhang 
5165ccb76cbSHong Zhang   Options Database:
5175ccb76cbSHong Zhang .   -mat_superlu_ilu_droptol <dtol>
5185ccb76cbSHong Zhang 
5195ccb76cbSHong Zhang    Level: beginner
5205ccb76cbSHong Zhang 
5215ccb76cbSHong Zhang    References: SuperLU Users' Guide
5225ccb76cbSHong Zhang 
5235ccb76cbSHong Zhang .seealso: MatGetFactor()
5245ccb76cbSHong Zhang @*/
5255ccb76cbSHong Zhang PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
5265ccb76cbSHong Zhang {
5275ccb76cbSHong Zhang   PetscErrorCode ierr;
5285ccb76cbSHong Zhang 
5295ccb76cbSHong Zhang   PetscFunctionBegin;
5305ccb76cbSHong Zhang   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
5315ccb76cbSHong Zhang   PetscValidLogicalCollectiveInt(F,dtol,2);
5325ccb76cbSHong Zhang   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
5335ccb76cbSHong Zhang   PetscFunctionReturn(0);
5345ccb76cbSHong Zhang }
5355ccb76cbSHong Zhang 
5365ccb76cbSHong Zhang #undef __FUNCT__
53735bd34faSBarry Smith #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu"
53835bd34faSBarry Smith PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
53935bd34faSBarry Smith {
54035bd34faSBarry Smith   PetscFunctionBegin;
5412692d6eeSBarry Smith   *type = MATSOLVERSUPERLU;
54235bd34faSBarry Smith   PetscFunctionReturn(0);
54335bd34faSBarry Smith }
54435bd34faSBarry Smith 
545b24902e0SBarry Smith 
546b24902e0SBarry Smith /*MC
547ba992d64SSatish Balay   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
548b24902e0SBarry Smith   via the external package SuperLU.
549b24902e0SBarry Smith 
550e2e64c6bSBarry Smith   Use ./configure --download-superlu to have PETSc installed with SuperLU
551b24902e0SBarry Smith 
552b24902e0SBarry Smith   Options Database Keys:
553e08999f5SMatthew G Knepley + -mat_superlu_equil <FALSE>            - Equil (None)
554e08999f5SMatthew G Knepley . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
555e08999f5SMatthew G Knepley . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
556e08999f5SMatthew G Knepley . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
557e08999f5SMatthew G Knepley . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
558e08999f5SMatthew G Knepley . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
559e08999f5SMatthew G Knepley . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
560e08999f5SMatthew G Knepley . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
561e08999f5SMatthew G Knepley . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
562e08999f5SMatthew G Knepley . -mat_superlu_printstat <FALSE>        - PrintStat (None)
563e08999f5SMatthew G Knepley . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
564e08999f5SMatthew G Knepley . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
565e08999f5SMatthew G Knepley . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
566e08999f5SMatthew G Knepley . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
567e08999f5SMatthew G Knepley . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
568e08999f5SMatthew G Knepley . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
569e08999f5SMatthew G Knepley - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
570b24902e0SBarry Smith 
5712692d6eeSBarry Smith    Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
5725c9eb25fSBarry Smith 
573b24902e0SBarry Smith    Level: beginner
574b24902e0SBarry Smith 
575d45987f3SHong Zhang .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage
576b24902e0SBarry Smith M*/
577b24902e0SBarry Smith 
578b24902e0SBarry Smith #undef __FUNCT__
579b24902e0SBarry Smith #define __FUNCT__ "MatGetFactor_seqaij_superlu"
5808cc058d9SJed Brown PETSC_EXTERN PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
581b24902e0SBarry Smith {
58214b396bbSKris Buschelman   Mat            B;
583f0c56d0fSKris Buschelman   Mat_SuperLU    *lu;
5846849ba73SBarry Smith   PetscErrorCode ierr;
5855d8b2efaSHong Zhang   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
5868afaa268SBarry Smith   PetscBool      flg,set;
5873cb270beSHong Zhang   PetscReal      real_input;
5885ca28756SHong Zhang   const char     *colperm[]   ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
5895ca28756SHong Zhang   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
5905ca28756SHong Zhang   const char     *rowperm[]   ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
59114b396bbSKris Buschelman 
59214b396bbSKris Buschelman   PetscFunctionBegin;
593ce94432eSBarry Smith   ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr);
594d0f46423SBarry Smith   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
5957adad957SLisandro Dalcin   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
5960298fd71SBarry Smith   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
597f0c56d0fSKris Buschelman 
598cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
599b24902e0SBarry Smith     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
600cffbb591SHong Zhang     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
601b3a44c85SBarry Smith   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
602cffbb591SHong Zhang 
603b24902e0SBarry Smith   B->ops->destroy     = MatDestroy_SuperLU;
6043519fcdcSHong Zhang   B->ops->view        = MatView_SuperLU;
60520be8e61SHong Zhang   B->ops->getdiagonal = MatGetDiagonal_SuperLU;
606d5f3da31SBarry Smith   B->factortype       = ftype;
60794b7f48cSBarry Smith   B->assembled        = PETSC_TRUE;           /* required by -ksp_view */
6085c9eb25fSBarry Smith   B->preallocated     = PETSC_TRUE;
60914b396bbSKris Buschelman 
610b00a9115SJed Brown   ierr = PetscNewLog(B,&lu);CHKERRQ(ierr);
611cae5a23dSHong Zhang 
612cffbb591SHong Zhang   if (ftype == MAT_FACTOR_LU) {
6139ce50919SHong Zhang     set_default_options(&lu->options);
6143d6581fbSHong Zhang     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
6153d6581fbSHong Zhang       "Whether or not the system will be equilibrated depends on the
6163d6581fbSHong Zhang        scaling of the matrix A, but if equilibration is used, A is
6173d6581fbSHong Zhang        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
6183d6581fbSHong Zhang        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
6193d6581fbSHong Zhang      We set 'options.Equil = NO' as default because additional space is needed for it.
6203d6581fbSHong Zhang     */
6213d6581fbSHong Zhang     lu->options.Equil = NO;
622cffbb591SHong Zhang   } else if (ftype == MAT_FACTOR_ILU) {
6230c74a584SJed Brown     /* Set the default input options of ilu: */
624d387c056SBarry Smith     PetscStackCall("SuperLU:ilu_set_default_options",ilu_set_default_options(&lu->options));
625cffbb591SHong Zhang   }
6269ce50919SHong Zhang   lu->options.PrintStat = NO;
6271a2e2f44SHong Zhang 
6285d8b2efaSHong Zhang   /* Initialize the statistics variables. */
629d387c056SBarry Smith   PetscStackCall("SuperLU:StatInit",StatInit(&lu->stat));
6308914a3f7SHong Zhang   lu->lwork = 0;   /* allocate space internally by system malloc */
6319ce50919SHong Zhang 
632ce94432eSBarry Smith   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
6338afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,NULL);CHKERRQ(ierr);
6348914a3f7SHong Zhang   ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
6352205254eSKarl Rupp   if (flg) lu->options.ColPerm = (colperm_t)indx;
6368914a3f7SHong Zhang   ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
6372205254eSKarl Rupp   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
6388afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,&set);CHKERRQ(ierr);
6398afaa268SBarry Smith   if (set && flg) lu->options.SymmetricMode = YES;
6403cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
6413cb270beSHong Zhang   if (flg) lu->options.DiagPivotThresh = (double) real_input;
6428afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,&set);CHKERRQ(ierr);
6438afaa268SBarry Smith   if (set && flg) lu->options.PivotGrowth = YES;
6448afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,&set);CHKERRQ(ierr);
6458afaa268SBarry Smith   if (set && flg) lu->options.ConditionNumber = YES;
646d7ebd59bSHong Zhang   ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
6472205254eSKarl Rupp   if (flg) lu->options.RowPerm = (rowperm_t)indx;
6488afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,&set);CHKERRQ(ierr);
6498afaa268SBarry Smith   if (set && flg) lu->options.ReplaceTinyPivot = YES;
6508afaa268SBarry Smith   ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,&set);CHKERRQ(ierr);
6518afaa268SBarry Smith   if (set && flg) lu->options.PrintStat = YES;
6520298fd71SBarry Smith   ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,NULL);CHKERRQ(ierr);
6535fe6150dSHong Zhang   if (lu->lwork > 0) {
654d87de817SBarry Smith     /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/
6555fe6150dSHong Zhang     ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
6565fe6150dSHong Zhang   } else if (lu->lwork != 0 && lu->lwork != -1) {
65777431f27SBarry Smith     ierr      = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
6588914a3f7SHong Zhang     lu->lwork = 0;
6598914a3f7SHong Zhang   }
660cffbb591SHong Zhang   /* ilu options */
6613cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
6623cb270beSHong Zhang   if (flg) lu->options.ILU_DropTol = (double) real_input;
6633cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
6643cb270beSHong Zhang   if (flg) lu->options.ILU_FillTol = (double) real_input;
6653cb270beSHong Zhang   ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
6663cb270beSHong Zhang   if (flg) lu->options.ILU_FillFactor = (double) real_input;
6670298fd71SBarry Smith   ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,NULL);CHKERRQ(ierr);
668cffbb591SHong Zhang   ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
6692205254eSKarl Rupp   if (flg) lu->options.ILU_Norm = (norm_t)indx;
670cffbb591SHong Zhang   ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
6712205254eSKarl Rupp   if (flg) lu->options.ILU_MILU = (milu_t)indx;
6729ce50919SHong Zhang   PetscOptionsEnd();
67338abfdaeSHong Zhang   if (lu->options.Equil == YES) {
67438abfdaeSHong Zhang     /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
67538abfdaeSHong Zhang     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
67638abfdaeSHong Zhang   }
6779ce50919SHong Zhang 
6785d8b2efaSHong Zhang   /* Allocate spaces (notice sizes are for the transpose) */
679785e854fSJed Brown   ierr = PetscMalloc1(m,&lu->etree);CHKERRQ(ierr);
680785e854fSJed Brown   ierr = PetscMalloc1(n,&lu->perm_r);CHKERRQ(ierr);
681785e854fSJed Brown   ierr = PetscMalloc1(m,&lu->perm_c);CHKERRQ(ierr);
682785e854fSJed Brown   ierr = PetscMalloc1(n,&lu->R);CHKERRQ(ierr);
683785e854fSJed Brown   ierr = PetscMalloc1(m,&lu->C);CHKERRQ(ierr);
6845d8b2efaSHong Zhang 
6855d8b2efaSHong Zhang   /* create rhs and solution x without allocate space for .Store */
6865d8b2efaSHong Zhang #if defined(PETSC_USE_COMPLEX)
6873cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
688d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
689d387c056SBarry Smith   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
6903cb270beSHong Zhang #else
691d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
692d387c056SBarry Smith   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
6933cb270beSHong Zhang #endif
6943cb270beSHong Zhang #else
6953cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE)
696d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
697d387c056SBarry Smith   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
6985d8b2efaSHong Zhang #else
699d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
700d387c056SBarry Smith   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
7015d8b2efaSHong Zhang #endif
7023cb270beSHong Zhang #endif
7035d8b2efaSHong Zhang 
704bdf89e91SBarry Smith   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
705bdf89e91SBarry Smith   ierr     = PetscObjectComposeFunction((PetscObject)B,"MatSuperluSetILUDropTol_C",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
7065c9eb25fSBarry Smith   B->spptr = lu;
70720be8e61SHong Zhang 
708899d7b4fSKris Buschelman   *F       = B;
70914b396bbSKris Buschelman   PetscFunctionReturn(0);
71014b396bbSKris Buschelman }
711d954db57SHong Zhang 
71242c9c57cSBarry Smith #undef __FUNCT__
71342c9c57cSBarry Smith #define __FUNCT__ "MatSolverPackageRegister_SuperLU"
71429b38603SBarry Smith PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_SuperLU(void)
71542c9c57cSBarry Smith {
71642c9c57cSBarry Smith   PetscErrorCode ierr;
71742c9c57cSBarry Smith 
71842c9c57cSBarry Smith   PetscFunctionBegin;
71942c9c57cSBarry Smith   ierr = MatSolverPackageRegister(MATSOLVERSUPERLU,MATSEQAIJ,       MAT_FACTOR_LU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr);
72042c9c57cSBarry Smith   ierr = MatSolverPackageRegister(MATSOLVERSUPERLU,MATSEQAIJ,       MAT_FACTOR_ILU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr);
72142c9c57cSBarry Smith   PetscFunctionReturn(0);
72242c9c57cSBarry Smith }
723