114b396bbSKris Buschelman 25a46d3faSBarry Smith /* -------------------------------------------------------------------- 35a46d3faSBarry Smith 45a46d3faSBarry Smith This file implements a subclass of the SeqAIJ matrix class that uses 5c2b89b5dSBarry Smith the SuperLU sparse solver. 614b396bbSKris Buschelman */ 714b396bbSKris Buschelman 85a46d3faSBarry Smith /* 95a46d3faSBarry Smith Defines the data structure for the base matrix type (SeqAIJ) 105a46d3faSBarry Smith */ 11c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 1214b396bbSKris Buschelman 135a46d3faSBarry Smith /* 145a46d3faSBarry Smith SuperLU include files 155a46d3faSBarry Smith */ 1614b396bbSKris Buschelman EXTERN_C_BEGIN 1714b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX) 183cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 193cb270beSHong Zhang #include <slu_cdefs.h> 203cb270beSHong Zhang #else 21c6db04a5SJed Brown #include <slu_zdefs.h> 223cb270beSHong Zhang #endif 233cb270beSHong Zhang #else 243cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 253cb270beSHong Zhang #include <slu_sdefs.h> 2614b396bbSKris Buschelman #else 27c6db04a5SJed Brown #include <slu_ddefs.h> 2814b396bbSKris Buschelman #endif 293cb270beSHong Zhang #endif 30c6db04a5SJed Brown #include <slu_util.h> 3114b396bbSKris Buschelman EXTERN_C_END 3214b396bbSKris Buschelman 335a46d3faSBarry Smith /* 34245fece9SBarry Smith This is the data that defines the SuperLU factored matrix type 355a46d3faSBarry Smith */ 3614b396bbSKris Buschelman typedef struct { 3775af56d4SHong Zhang SuperMatrix A,L,U,B,X; 3875af56d4SHong Zhang superlu_options_t options; 39da7a1d00SHong Zhang PetscInt *perm_c; /* column permutation vector */ 40da7a1d00SHong Zhang PetscInt *perm_r; /* row permutations from partial pivoting */ 41da7a1d00SHong Zhang PetscInt *etree; 42da7a1d00SHong Zhang PetscReal *R, *C; 4375af56d4SHong Zhang char equed[1]; 44da7a1d00SHong Zhang PetscInt lwork; 4575af56d4SHong Zhang void *work; 46da7a1d00SHong Zhang PetscReal rpg, rcond; 4775af56d4SHong Zhang mem_usage_t mem_usage; 4875af56d4SHong Zhang MatStructure flg; 495d8b2efaSHong Zhang SuperLUStat_t stat; 50cae5a23dSHong Zhang Mat A_dup; 51cae5a23dSHong Zhang PetscScalar *rhs_dup; 52c147c726SHong Zhang GlobalLU_t Glu; 5314b396bbSKris Buschelman 5414b396bbSKris Buschelman /* Flag to clean up (non-global) SuperLU objects during Destroy */ 55ace3abfcSBarry Smith PetscBool CleanUpSuperLU; 56f0c56d0fSKris Buschelman } Mat_SuperLU; 5714b396bbSKris Buschelman 585a46d3faSBarry Smith /* 595a46d3faSBarry Smith Utility function 605a46d3faSBarry Smith */ 61*860c79edSBarry Smith static PetscErrorCode MatView_Info_SuperLU(Mat A,PetscViewer viewer) 625a46d3faSBarry Smith { 63245fece9SBarry Smith Mat_SuperLU *lu= (Mat_SuperLU*)A->data; 645a46d3faSBarry Smith PetscErrorCode ierr; 655a46d3faSBarry Smith superlu_options_t options; 665a46d3faSBarry Smith 675a46d3faSBarry Smith PetscFunctionBegin; 685a46d3faSBarry Smith options = lu->options; 692205254eSKarl Rupp 705a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr); 715a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," Equil: %s\n",(options.Equil != NO) ? "YES" : "NO");CHKERRQ(ierr); 725a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr); 735a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr); 745a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES" : "NO");CHKERRQ(ierr); 755a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr); 765a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES" : "NO");CHKERRQ(ierr); 775a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES" : "NO");CHKERRQ(ierr); 785a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr); 795a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES" : "NO");CHKERRQ(ierr); 805a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," PrintStat: %s\n",(options.PrintStat != NO) ? "YES" : "NO");CHKERRQ(ierr); 815a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," lwork: %D\n",lu->lwork);CHKERRQ(ierr); 82d5f3da31SBarry Smith if (A->factortype == MAT_FACTOR_ILU) { 83cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr); 84cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr); 85cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr); 86cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr); 87cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr); 88cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr); 89cffbb591SHong Zhang } 905a46d3faSBarry Smith PetscFunctionReturn(0); 915a46d3faSBarry Smith } 925a46d3faSBarry Smith 93245fece9SBarry Smith PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x) 94245fece9SBarry Smith { 95245fece9SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)A->data; 96245fece9SBarry Smith const PetscScalar *barray; 97245fece9SBarry Smith PetscScalar *xarray; 98245fece9SBarry Smith PetscErrorCode ierr; 99245fece9SBarry Smith PetscInt info,i,n; 100245fece9SBarry Smith PetscReal ferr,berr; 101245fece9SBarry Smith static PetscBool cite = PETSC_FALSE; 102245fece9SBarry Smith 103245fece9SBarry Smith PetscFunctionBegin; 104245fece9SBarry Smith if (lu->lwork == -1) PetscFunctionReturn(0); 105245fece9SBarry Smith ierr = PetscCitationsRegister("@article{superlu99,\n author = {James W. Demmel and Stanley C. Eisenstat and\n John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n title = {A supernodal approach to sparse partial pivoting},\n journal = {SIAM J. Matrix Analysis and Applications},\n year = {1999},\n volume = {20},\n number = {3},\n pages = {720-755}\n}\n",&cite);CHKERRQ(ierr); 106245fece9SBarry Smith 107245fece9SBarry Smith ierr = VecGetLocalSize(x,&n);CHKERRQ(ierr); 108245fece9SBarry Smith lu->B.ncol = 1; /* Set the number of right-hand side */ 109245fece9SBarry Smith if (lu->options.Equil && !lu->rhs_dup) { 110245fece9SBarry Smith /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */ 111245fece9SBarry Smith ierr = PetscMalloc1(n,&lu->rhs_dup);CHKERRQ(ierr); 112245fece9SBarry Smith } 113245fece9SBarry Smith if (lu->options.Equil) { 114245fece9SBarry Smith /* Copy b into rsh_dup */ 115245fece9SBarry Smith ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 116245fece9SBarry Smith ierr = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr); 117245fece9SBarry Smith ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 118245fece9SBarry Smith barray = lu->rhs_dup; 119245fece9SBarry Smith } else { 120245fece9SBarry Smith ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 121245fece9SBarry Smith } 122245fece9SBarry Smith ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 123245fece9SBarry Smith 124245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX) 125245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 126245fece9SBarry Smith ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray; 127245fece9SBarry Smith ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray; 128245fece9SBarry Smith #else 129245fece9SBarry Smith ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray; 130245fece9SBarry Smith ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray; 131245fece9SBarry Smith #endif 132245fece9SBarry Smith #else 133245fece9SBarry Smith ((DNformat*)lu->B.Store)->nzval = (void*)barray; 134245fece9SBarry Smith ((DNformat*)lu->X.Store)->nzval = xarray; 135245fece9SBarry Smith #endif 136245fece9SBarry Smith 137245fece9SBarry Smith lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ 138245fece9SBarry Smith if (A->factortype == MAT_FACTOR_LU) { 139245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX) 140245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 141245fece9SBarry Smith PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 142245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 143245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 144245fece9SBarry Smith #else 145245fece9SBarry Smith PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 146245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 147245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 148245fece9SBarry Smith #endif 149245fece9SBarry Smith #else 150245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 151245fece9SBarry Smith PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 152245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 153245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 154245fece9SBarry Smith #else 155245fece9SBarry Smith PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 156245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 157245fece9SBarry Smith &lu->Glu,&lu->mem_usage, &lu->stat, &info)); 158245fece9SBarry Smith #endif 159245fece9SBarry Smith #endif 160245fece9SBarry Smith } else if (A->factortype == MAT_FACTOR_ILU) { 161245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX) 162245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 163245fece9SBarry Smith PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 164245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 165245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 166245fece9SBarry Smith #else 167245fece9SBarry Smith PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 168245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 169245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 170245fece9SBarry Smith #endif 171245fece9SBarry Smith #else 172245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 173245fece9SBarry Smith PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 174245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 175245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 176245fece9SBarry Smith #else 177245fece9SBarry Smith PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 178245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 179245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 180245fece9SBarry Smith #endif 181245fece9SBarry Smith #endif 182245fece9SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 183245fece9SBarry Smith if (!lu->options.Equil) { 184245fece9SBarry Smith ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 185245fece9SBarry Smith } 186245fece9SBarry Smith ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 187245fece9SBarry Smith 188245fece9SBarry Smith if (!info || info == lu->A.ncol+1) { 189245fece9SBarry Smith if (lu->options.IterRefine) { 190245fece9SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n"); 191245fece9SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); 192245fece9SBarry Smith for (i = 0; i < 1; ++i) { 193245fece9SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr); 194245fece9SBarry Smith } 195245fece9SBarry Smith } 196245fece9SBarry Smith } else if (info > 0) { 197245fece9SBarry Smith if (lu->lwork == -1) { 198245fece9SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," ** Estimated memory: %D bytes\n", info - lu->A.ncol); 199245fece9SBarry Smith } else { 200245fece9SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," Warning: gssvx() returns info %D\n",info); 201245fece9SBarry Smith } 202245fece9SBarry Smith } else if (info < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info); 203245fece9SBarry Smith 204245fece9SBarry Smith if (lu->options.PrintStat) { 205245fece9SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n"); 206245fece9SBarry Smith PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat)); 207245fece9SBarry Smith } 208245fece9SBarry Smith PetscFunctionReturn(0); 209245fece9SBarry Smith } 210245fece9SBarry Smith 211245fece9SBarry Smith PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x) 212245fece9SBarry Smith { 213245fece9SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)A->data; 214245fece9SBarry Smith PetscErrorCode ierr; 215245fece9SBarry Smith 216245fece9SBarry Smith PetscFunctionBegin; 217603e8f96SBarry Smith if (A->factorerrortype) { 218245fece9SBarry Smith ierr = PetscInfo(A,"MatSolve is called with singular matrix factor, skip\n");CHKERRQ(ierr); 219245fece9SBarry Smith ierr = VecSetInf(x);CHKERRQ(ierr); 220245fece9SBarry Smith PetscFunctionReturn(0); 221245fece9SBarry Smith } 222245fece9SBarry Smith 223245fece9SBarry Smith lu->options.Trans = TRANS; 224245fece9SBarry Smith ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr); 225245fece9SBarry Smith PetscFunctionReturn(0); 226245fece9SBarry Smith } 227245fece9SBarry Smith 228245fece9SBarry Smith PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x) 229245fece9SBarry Smith { 230245fece9SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)A->data; 231245fece9SBarry Smith PetscErrorCode ierr; 232245fece9SBarry Smith 233245fece9SBarry Smith PetscFunctionBegin; 234603e8f96SBarry Smith if (A->factorerrortype) { 235245fece9SBarry Smith ierr = PetscInfo(A,"MatSolve is called with singular matrix factor, skip\n");CHKERRQ(ierr); 236245fece9SBarry Smith ierr = VecSetInf(x);CHKERRQ(ierr); 237245fece9SBarry Smith PetscFunctionReturn(0); 238245fece9SBarry Smith } 239245fece9SBarry Smith 240245fece9SBarry Smith lu->options.Trans = NOTRANS; 241245fece9SBarry Smith ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr); 242245fece9SBarry Smith PetscFunctionReturn(0); 243245fece9SBarry Smith } 244245fece9SBarry Smith 245245fece9SBarry Smith static PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info) 2465a46d3faSBarry Smith { 247245fece9SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)F->data; 248cae5a23dSHong Zhang Mat_SeqAIJ *aa; 2495a46d3faSBarry Smith PetscErrorCode ierr; 2505a46d3faSBarry Smith PetscInt sinfo; 2515a46d3faSBarry Smith PetscReal ferr, berr; 2525a46d3faSBarry Smith NCformat *Ustore; 2535a46d3faSBarry Smith SCformat *Lstore; 2545a46d3faSBarry Smith 2555a46d3faSBarry Smith PetscFunctionBegin; 2565a46d3faSBarry Smith if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */ 2575a46d3faSBarry Smith lu->options.Fact = SamePattern; 2585a46d3faSBarry Smith /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */ 2595a46d3faSBarry Smith Destroy_SuperMatrix_Store(&lu->A); 260cae5a23dSHong Zhang if (lu->options.Equil) { 261cae5a23dSHong Zhang ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 262cae5a23dSHong Zhang } 2635a46d3faSBarry Smith if (lu->lwork >= 0) { 264d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L)); 265d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U)); 2665a46d3faSBarry Smith lu->options.Fact = SamePattern; 2675a46d3faSBarry Smith } 2685a46d3faSBarry Smith } 2695a46d3faSBarry Smith 2705a46d3faSBarry Smith /* Create the SuperMatrix for lu->A=A^T: 2715a46d3faSBarry Smith Since SuperLU likes column-oriented matrices,we pass it the transpose, 2725a46d3faSBarry Smith and then solve A^T X = B in MatSolve(). */ 273cae5a23dSHong Zhang if (lu->options.Equil) { 274cae5a23dSHong Zhang aa = (Mat_SeqAIJ*)(lu->A_dup)->data; 275cae5a23dSHong Zhang } else { 276cae5a23dSHong Zhang aa = (Mat_SeqAIJ*)(A)->data; 277cae5a23dSHong Zhang } 2785a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX) 2793cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 280d387c056SBarry Smith PetscStackCall("SuperLU:cCreate_CompCol_Matrix",cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE)); 2813cb270beSHong Zhang #else 282d387c056SBarry Smith PetscStackCall("SuperLU:zCreate_CompCol_Matrix",zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE)); 2833cb270beSHong Zhang #endif 2843cb270beSHong Zhang #else 2853cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 286d387c056SBarry Smith PetscStackCall("SuperLU:sCreate_CompCol_Matrix",sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE)); 2875a46d3faSBarry Smith #else 288d387c056SBarry Smith PetscStackCall("SuperLU:dCreate_CompCol_Matrix",dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE)); 2895a46d3faSBarry Smith #endif 2903cb270beSHong Zhang #endif 2915a46d3faSBarry Smith 2925a46d3faSBarry Smith /* Numerical factorization */ 2935a46d3faSBarry Smith lu->B.ncol = 0; /* Indicate not to solve the system */ 294d5f3da31SBarry Smith if (F->factortype == MAT_FACTOR_LU) { 2955a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX) 2963cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 297d387c056SBarry Smith PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 2983cb270beSHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 2995db81dd2SSatish Balay &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 3003cb270beSHong Zhang #else 301d387c056SBarry Smith PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 3025a46d3faSBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 3035db81dd2SSatish Balay &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 3043cb270beSHong Zhang #endif 3053cb270beSHong Zhang #else 3063cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 307d387c056SBarry Smith PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 3083cb270beSHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 3095db81dd2SSatish Balay &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 3105a46d3faSBarry Smith #else 311d387c056SBarry Smith PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 3125a46d3faSBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 313c147c726SHong Zhang &lu->Glu,&lu->mem_usage, &lu->stat, &sinfo)); 3145a46d3faSBarry Smith #endif 3153cb270beSHong Zhang #endif 316d5f3da31SBarry Smith } else if (F->factortype == MAT_FACTOR_ILU) { 317cffbb591SHong Zhang /* Compute the incomplete factorization, condition number and pivot growth */ 318cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX) 3193cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 320d387c056SBarry Smith PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C, 3213cb270beSHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 3225db81dd2SSatish Balay &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 3233cb270beSHong Zhang #else 324d387c056SBarry Smith PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C, 325cffbb591SHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 3265db81dd2SSatish Balay &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 3273cb270beSHong Zhang #endif 3283cb270beSHong Zhang #else 3293cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 330d387c056SBarry Smith PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 3313cb270beSHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 3325db81dd2SSatish Balay &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 333cffbb591SHong Zhang #else 334d387c056SBarry Smith PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 335cffbb591SHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 336c147c726SHong Zhang &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 337cffbb591SHong Zhang #endif 3383cb270beSHong Zhang #endif 339f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 3405a46d3faSBarry Smith if (!sinfo || sinfo == lu->A.ncol+1) { 3412205254eSKarl Rupp if (lu->options.PivotGrowth) { 342675d1226SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," Recip. pivot growth = %e\n", lu->rpg);CHKERRQ(ierr); 3432205254eSKarl Rupp } 3442205254eSKarl Rupp if (lu->options.ConditionNumber) { 345675d1226SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," Recip. condition number = %e\n", lu->rcond);CHKERRQ(ierr); 3462205254eSKarl Rupp } 3475a46d3faSBarry Smith } else if (sinfo > 0) { 348675d1226SHong Zhang if (A->erroriffailure) { 349675d1226SHong Zhang SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo); 350675d1226SHong Zhang } else { 351675d1226SHong Zhang if (sinfo <= lu->A.ncol) { 352675d1226SHong Zhang if (lu->options.ILU_FillTol == 0.0) { 353603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 354675d1226SHong Zhang } 355675d1226SHong Zhang ierr = PetscInfo2(F,"Number of zero pivots %D, ILU_FillTol %g\n",sinfo,lu->options.ILU_FillTol);CHKERRQ(ierr); 356675d1226SHong Zhang } else if (sinfo == lu->A.ncol + 1) { 357675d1226SHong Zhang /* 358675d1226SHong Zhang U is nonsingular, but RCOND is less than machine 359675d1226SHong Zhang precision, meaning that the matrix is singular to 360675d1226SHong Zhang working precision. Nevertheless, the solution and 361675d1226SHong Zhang error bounds are computed because there are a number 362675d1226SHong Zhang of situations where the computed solution can be more 363675d1226SHong Zhang accurate than the value of RCOND would suggest. 364675d1226SHong Zhang */ 365675d1226SHong Zhang ierr = PetscInfo1(F,"Matrix factor U is nonsingular, but is singular to working precision. The solution is computed. info %D",sinfo);CHKERRQ(ierr); 366675d1226SHong Zhang } else { /* sinfo > lu->A.ncol + 1 */ 367603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 368675d1226SHong Zhang ierr = PetscInfo1(F,"Number of bytes allocated when memory allocation fails %D\n",sinfo);CHKERRQ(ierr); 369675d1226SHong Zhang } 370675d1226SHong Zhang } 371f23aa3ddSBarry Smith } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo); 3725a46d3faSBarry Smith 3735a46d3faSBarry Smith if (lu->options.PrintStat) { 374675d1226SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");CHKERRQ(ierr); 375d387c056SBarry Smith PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat)); 3765a46d3faSBarry Smith Lstore = (SCformat*) lu->L.Store; 3775a46d3faSBarry Smith Ustore = (NCformat*) lu->U.Store; 3785a46d3faSBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in factor L = %D\n", Lstore->nnz); 3795a46d3faSBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in factor U = %D\n", Ustore->nnz); 3805a46d3faSBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol); 3816da386baSHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," L\\U MB %.3f\ttotal MB needed %.3f\n", 3826da386baSHong Zhang lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6); 3835a46d3faSBarry Smith } 3845a46d3faSBarry Smith 3855a46d3faSBarry Smith lu->flg = SAME_NONZERO_PATTERN; 3861d5ca7f3SHong Zhang F->ops->solve = MatSolve_SuperLU; 3871d5ca7f3SHong Zhang F->ops->solvetranspose = MatSolveTranspose_SuperLU; 3881aef8b4cSStefano Zampini F->ops->matsolve = NULL; 3895a46d3faSBarry Smith PetscFunctionReturn(0); 3905a46d3faSBarry Smith } 3915a46d3faSBarry Smith 392245fece9SBarry Smith static PetscErrorCode MatDestroy_SuperLU(Mat A) 39314b396bbSKris Buschelman { 394dfbe8321SBarry Smith PetscErrorCode ierr; 395245fece9SBarry Smith Mat_SuperLU *lu=(Mat_SuperLU*)A->data; 39614b396bbSKris Buschelman 39714b396bbSKris Buschelman PetscFunctionBegin; 398245fece9SBarry Smith if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */ 399d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->A)); 400d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->B)); 401d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->X)); 402d387c056SBarry Smith PetscStackCall("SuperLU:StatFree",StatFree(&lu->stat)); 4030e742b69SHong Zhang if (lu->lwork >= 0) { 404d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L)); 405d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U)); 4060e742b69SHong Zhang } 4070e742b69SHong Zhang } 4089ce50919SHong Zhang ierr = PetscFree(lu->etree);CHKERRQ(ierr); 4099ce50919SHong Zhang ierr = PetscFree(lu->perm_r);CHKERRQ(ierr); 4109ce50919SHong Zhang ierr = PetscFree(lu->perm_c);CHKERRQ(ierr); 4119ce50919SHong Zhang ierr = PetscFree(lu->R);CHKERRQ(ierr); 4129ce50919SHong Zhang ierr = PetscFree(lu->C);CHKERRQ(ierr); 413bf0cc555SLisandro Dalcin ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr); 414bf0cc555SLisandro Dalcin ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr); 415245fece9SBarry Smith ierr = PetscFree(A->data);CHKERRQ(ierr); 4160e742b69SHong Zhang 417d954db57SHong Zhang /* clear composed functions */ 418bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverPackage_C",NULL);CHKERRQ(ierr); 419bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluSetILUDropTol_C",NULL);CHKERRQ(ierr); 42014b396bbSKris Buschelman PetscFunctionReturn(0); 42114b396bbSKris Buschelman } 42214b396bbSKris Buschelman 423245fece9SBarry Smith static PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer) 42414b396bbSKris Buschelman { 425dfbe8321SBarry Smith PetscErrorCode ierr; 426ace3abfcSBarry Smith PetscBool iascii; 42714b396bbSKris Buschelman PetscViewerFormat format; 42814b396bbSKris Buschelman 42914b396bbSKris Buschelman PetscFunctionBegin; 430251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 43132077d6dSBarry Smith if (iascii) { 43214b396bbSKris Buschelman ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 4332f59403fSHong Zhang if (format == PETSC_VIEWER_ASCII_INFO) { 434*860c79edSBarry Smith ierr = MatView_Info_SuperLU(A,viewer);CHKERRQ(ierr); 43514b396bbSKris Buschelman } 43614b396bbSKris Buschelman } 43714b396bbSKris Buschelman PetscFunctionReturn(0); 43814b396bbSKris Buschelman } 43914b396bbSKris Buschelman 440e0b74bf9SHong Zhang PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X) 441e0b74bf9SHong Zhang { 442245fece9SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)A->data; 443cd723cd1SBarry Smith PetscBool flg; 444cd723cd1SBarry Smith PetscErrorCode ierr; 445e0b74bf9SHong Zhang 446e0b74bf9SHong Zhang PetscFunctionBegin; 4470298fd71SBarry Smith ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 448ce94432eSBarry Smith if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 4490298fd71SBarry Smith ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 450ce94432eSBarry Smith if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 4512205254eSKarl Rupp lu->options.Trans = TRANS; 452e0b74bf9SHong Zhang SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet"); 453e0b74bf9SHong Zhang PetscFunctionReturn(0); 454e0b74bf9SHong Zhang } 455e0b74bf9SHong Zhang 45614b396bbSKris Buschelman /* 45714b396bbSKris Buschelman Note the r permutation is ignored 45814b396bbSKris Buschelman */ 459245fece9SBarry Smith static PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 46014b396bbSKris Buschelman { 461245fece9SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)(F->data); 462b24902e0SBarry Smith 463b24902e0SBarry Smith PetscFunctionBegin; 464b24902e0SBarry Smith lu->flg = DIFFERENT_NONZERO_PATTERN; 465b24902e0SBarry Smith lu->CleanUpSuperLU = PETSC_TRUE; 4661d5ca7f3SHong Zhang F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU; 467b24902e0SBarry Smith PetscFunctionReturn(0); 468b24902e0SBarry Smith } 469b24902e0SBarry Smith 470b2573a8aSBarry Smith static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol) 4715ccb76cbSHong Zhang { 472245fece9SBarry Smith Mat_SuperLU *lu= (Mat_SuperLU*)F->data; 4735ccb76cbSHong Zhang 4745ccb76cbSHong Zhang PetscFunctionBegin; 4755ccb76cbSHong Zhang lu->options.ILU_DropTol = dtol; 4765ccb76cbSHong Zhang PetscFunctionReturn(0); 4775ccb76cbSHong Zhang } 4785ccb76cbSHong Zhang 4795ccb76cbSHong Zhang /*@ 4805ccb76cbSHong Zhang MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance 4815ccb76cbSHong Zhang Logically Collective on Mat 4825ccb76cbSHong Zhang 4835ccb76cbSHong Zhang Input Parameters: 4845ccb76cbSHong Zhang + F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface 4855ccb76cbSHong Zhang - dtol - drop tolerance 4865ccb76cbSHong Zhang 4875ccb76cbSHong Zhang Options Database: 4885ccb76cbSHong Zhang . -mat_superlu_ilu_droptol <dtol> 4895ccb76cbSHong Zhang 4905ccb76cbSHong Zhang Level: beginner 4915ccb76cbSHong Zhang 49296a0c994SBarry Smith References: 49396a0c994SBarry Smith . SuperLU Users' Guide 4945ccb76cbSHong Zhang 4955ccb76cbSHong Zhang .seealso: MatGetFactor() 4965ccb76cbSHong Zhang @*/ 4975ccb76cbSHong Zhang PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol) 4985ccb76cbSHong Zhang { 4995ccb76cbSHong Zhang PetscErrorCode ierr; 5005ccb76cbSHong Zhang 5015ccb76cbSHong Zhang PetscFunctionBegin; 5025ccb76cbSHong Zhang PetscValidHeaderSpecific(F,MAT_CLASSID,1); 50369fbec6eSBarry Smith PetscValidLogicalCollectiveReal(F,dtol,2); 5045ccb76cbSHong Zhang ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr); 5055ccb76cbSHong Zhang PetscFunctionReturn(0); 5065ccb76cbSHong Zhang } 5075ccb76cbSHong Zhang 50835bd34faSBarry Smith PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type) 50935bd34faSBarry Smith { 51035bd34faSBarry Smith PetscFunctionBegin; 5112692d6eeSBarry Smith *type = MATSOLVERSUPERLU; 51235bd34faSBarry Smith PetscFunctionReturn(0); 51335bd34faSBarry Smith } 51435bd34faSBarry Smith 515b24902e0SBarry Smith /*MC 516ba992d64SSatish Balay MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices 517b24902e0SBarry Smith via the external package SuperLU. 518b24902e0SBarry Smith 519e2e64c6bSBarry Smith Use ./configure --download-superlu to have PETSc installed with SuperLU 520b24902e0SBarry Smith 521de5a416aSHong Zhang Use -pc_type lu -pc_factor_mat_solver_package superlu to use this direct solver 522c2b89b5dSBarry Smith 523b24902e0SBarry Smith Options Database Keys: 524e08999f5SMatthew G Knepley + -mat_superlu_equil <FALSE> - Equil (None) 525e08999f5SMatthew G Knepley . -mat_superlu_colperm <COLAMD> - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD 526e08999f5SMatthew G Knepley . -mat_superlu_iterrefine <NOREFINE> - (choose one of) NOREFINE SINGLE DOUBLE EXTRA 527e08999f5SMatthew G Knepley . -mat_superlu_symmetricmode: <FALSE> - SymmetricMode (None) 528e08999f5SMatthew G Knepley . -mat_superlu_diagpivotthresh <1> - DiagPivotThresh (None) 529e08999f5SMatthew G Knepley . -mat_superlu_pivotgrowth <FALSE> - PivotGrowth (None) 530e08999f5SMatthew G Knepley . -mat_superlu_conditionnumber <FALSE> - ConditionNumber (None) 531e08999f5SMatthew G Knepley . -mat_superlu_rowperm <NOROWPERM> - (choose one of) NOROWPERM LargeDiag 532e08999f5SMatthew G Knepley . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None) 533e08999f5SMatthew G Knepley . -mat_superlu_printstat <FALSE> - PrintStat (None) 534e08999f5SMatthew G Knepley . -mat_superlu_lwork <0> - size of work array in bytes used by factorization (None) 535e08999f5SMatthew G Knepley . -mat_superlu_ilu_droptol <0> - ILU_DropTol (None) 536e08999f5SMatthew G Knepley . -mat_superlu_ilu_filltol <0> - ILU_FillTol (None) 537e08999f5SMatthew G Knepley . -mat_superlu_ilu_fillfactor <0> - ILU_FillFactor (None) 538e08999f5SMatthew G Knepley . -mat_superlu_ilu_droprull <0> - ILU_DropRule (None) 539e08999f5SMatthew G Knepley . -mat_superlu_ilu_norm <0> - ILU_Norm (None) 540e08999f5SMatthew G Knepley - -mat_superlu_ilu_milu <0> - ILU_MILU (None) 541b24902e0SBarry Smith 5422692d6eeSBarry Smith Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves 5435c9eb25fSBarry Smith 544b24902e0SBarry Smith Level: beginner 545b24902e0SBarry Smith 546d45987f3SHong Zhang .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage 547b24902e0SBarry Smith M*/ 548b24902e0SBarry Smith 549db87b0f2SBarry Smith static PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F) 550b24902e0SBarry Smith { 55114b396bbSKris Buschelman Mat B; 552f0c56d0fSKris Buschelman Mat_SuperLU *lu; 5536849ba73SBarry Smith PetscErrorCode ierr; 5545d8b2efaSHong Zhang PetscInt indx,m=A->rmap->n,n=A->cmap->n; 5558afaa268SBarry Smith PetscBool flg,set; 5563cb270beSHong Zhang PetscReal real_input; 5575ca28756SHong Zhang const char *colperm[] ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */ 5585ca28756SHong Zhang const char *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"}; 5595ca28756SHong Zhang const char *rowperm[] ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */ 56014b396bbSKris Buschelman 56114b396bbSKris Buschelman PetscFunctionBegin; 562ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 563d0f46423SBarry Smith ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 564245fece9SBarry Smith ierr = PetscStrallocpy("superlu",&((PetscObject)B)->type_name);CHKERRQ(ierr); 565245fece9SBarry Smith ierr = MatSetUp(B);CHKERRQ(ierr); 566cffbb591SHong Zhang if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 567b24902e0SBarry Smith B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU; 568cffbb591SHong Zhang B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU; 569b3a44c85SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 570cffbb591SHong Zhang 57100c67f3bSHong Zhang ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 57200c67f3bSHong Zhang ierr = PetscStrallocpy(MATSOLVERSUPERLU,&B->solvertype);CHKERRQ(ierr); 57300c67f3bSHong Zhang 574b9c12af5SBarry Smith B->ops->getinfo = MatGetInfo_External; 575b24902e0SBarry Smith B->ops->destroy = MatDestroy_SuperLU; 5763519fcdcSHong Zhang B->ops->view = MatView_SuperLU; 577d5f3da31SBarry Smith B->factortype = ftype; 57894b7f48cSBarry Smith B->assembled = PETSC_TRUE; /* required by -ksp_view */ 5795c9eb25fSBarry Smith B->preallocated = PETSC_TRUE; 58014b396bbSKris Buschelman 581b00a9115SJed Brown ierr = PetscNewLog(B,&lu);CHKERRQ(ierr); 582cae5a23dSHong Zhang 583cffbb591SHong Zhang if (ftype == MAT_FACTOR_LU) { 5849ce50919SHong Zhang set_default_options(&lu->options); 5853d6581fbSHong Zhang /* Comments from SuperLU_4.0/SRC/dgssvx.c: 5863d6581fbSHong Zhang "Whether or not the system will be equilibrated depends on the 5873d6581fbSHong Zhang scaling of the matrix A, but if equilibration is used, A is 5883d6581fbSHong Zhang overwritten by diag(R)*A*diag(C) and B by diag(R)*B 5893d6581fbSHong Zhang (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)." 5903d6581fbSHong Zhang We set 'options.Equil = NO' as default because additional space is needed for it. 5913d6581fbSHong Zhang */ 5923d6581fbSHong Zhang lu->options.Equil = NO; 593cffbb591SHong Zhang } else if (ftype == MAT_FACTOR_ILU) { 5940c74a584SJed Brown /* Set the default input options of ilu: */ 595d387c056SBarry Smith PetscStackCall("SuperLU:ilu_set_default_options",ilu_set_default_options(&lu->options)); 596cffbb591SHong Zhang } 5979ce50919SHong Zhang lu->options.PrintStat = NO; 5981a2e2f44SHong Zhang 5995d8b2efaSHong Zhang /* Initialize the statistics variables. */ 600d387c056SBarry Smith PetscStackCall("SuperLU:StatInit",StatInit(&lu->stat)); 6018914a3f7SHong Zhang lu->lwork = 0; /* allocate space internally by system malloc */ 6029ce50919SHong Zhang 603ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr); 6048afaa268SBarry Smith ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,NULL);CHKERRQ(ierr); 6058914a3f7SHong Zhang ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr); 6062205254eSKarl Rupp if (flg) lu->options.ColPerm = (colperm_t)indx; 6078914a3f7SHong Zhang ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr); 6082205254eSKarl Rupp if (flg) lu->options.IterRefine = (IterRefine_t)indx; 6098afaa268SBarry Smith ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,&set);CHKERRQ(ierr); 6108afaa268SBarry Smith if (set && flg) lu->options.SymmetricMode = YES; 6113cb270beSHong Zhang ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr); 6123cb270beSHong Zhang if (flg) lu->options.DiagPivotThresh = (double) real_input; 6138afaa268SBarry Smith ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,&set);CHKERRQ(ierr); 6148afaa268SBarry Smith if (set && flg) lu->options.PivotGrowth = YES; 6158afaa268SBarry Smith ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,&set);CHKERRQ(ierr); 6168afaa268SBarry Smith if (set && flg) lu->options.ConditionNumber = YES; 617d7ebd59bSHong Zhang ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr); 6182205254eSKarl Rupp if (flg) lu->options.RowPerm = (rowperm_t)indx; 6198afaa268SBarry Smith ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,&set);CHKERRQ(ierr); 6208afaa268SBarry Smith if (set && flg) lu->options.ReplaceTinyPivot = YES; 6218afaa268SBarry Smith ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,&set);CHKERRQ(ierr); 6228afaa268SBarry Smith if (set && flg) lu->options.PrintStat = YES; 6230298fd71SBarry Smith ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,NULL);CHKERRQ(ierr); 6245fe6150dSHong Zhang if (lu->lwork > 0) { 625d87de817SBarry Smith /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/ 6265fe6150dSHong Zhang ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr); 6275fe6150dSHong Zhang } else if (lu->lwork != 0 && lu->lwork != -1) { 62877431f27SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork); 6298914a3f7SHong Zhang lu->lwork = 0; 6308914a3f7SHong Zhang } 631cffbb591SHong Zhang /* ilu options */ 6323cb270beSHong Zhang ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr); 6333cb270beSHong Zhang if (flg) lu->options.ILU_DropTol = (double) real_input; 6343cb270beSHong Zhang ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr); 6353cb270beSHong Zhang if (flg) lu->options.ILU_FillTol = (double) real_input; 6363cb270beSHong Zhang ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr); 6373cb270beSHong Zhang if (flg) lu->options.ILU_FillFactor = (double) real_input; 6380298fd71SBarry Smith ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,NULL);CHKERRQ(ierr); 639cffbb591SHong Zhang ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr); 6402205254eSKarl Rupp if (flg) lu->options.ILU_Norm = (norm_t)indx; 641cffbb591SHong Zhang ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr); 6422205254eSKarl Rupp if (flg) lu->options.ILU_MILU = (milu_t)indx; 643245fece9SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 64438abfdaeSHong Zhang if (lu->options.Equil == YES) { 64538abfdaeSHong Zhang /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */ 64638abfdaeSHong Zhang ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr); 64738abfdaeSHong Zhang } 6489ce50919SHong Zhang 6495d8b2efaSHong Zhang /* Allocate spaces (notice sizes are for the transpose) */ 650785e854fSJed Brown ierr = PetscMalloc1(m,&lu->etree);CHKERRQ(ierr); 651785e854fSJed Brown ierr = PetscMalloc1(n,&lu->perm_r);CHKERRQ(ierr); 652785e854fSJed Brown ierr = PetscMalloc1(m,&lu->perm_c);CHKERRQ(ierr); 653785e854fSJed Brown ierr = PetscMalloc1(n,&lu->R);CHKERRQ(ierr); 654785e854fSJed Brown ierr = PetscMalloc1(m,&lu->C);CHKERRQ(ierr); 6555d8b2efaSHong Zhang 6565d8b2efaSHong Zhang /* create rhs and solution x without allocate space for .Store */ 6575d8b2efaSHong Zhang #if defined(PETSC_USE_COMPLEX) 6583cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 659d387c056SBarry Smith PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE)); 660d387c056SBarry Smith PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE)); 6613cb270beSHong Zhang #else 662d387c056SBarry Smith PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE)); 663d387c056SBarry Smith PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE)); 6643cb270beSHong Zhang #endif 6653cb270beSHong Zhang #else 6663cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 667d387c056SBarry Smith PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE)); 668d387c056SBarry Smith PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE)); 6695d8b2efaSHong Zhang #else 670d387c056SBarry Smith PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE)); 671d387c056SBarry Smith PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE)); 6725d8b2efaSHong Zhang #endif 6733cb270beSHong Zhang #endif 6745d8b2efaSHong Zhang 675bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverPackage_C",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr); 676bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSuperluSetILUDropTol_C",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr); 677245fece9SBarry Smith B->data = lu; 67820be8e61SHong Zhang 679899d7b4fSKris Buschelman *F = B; 68014b396bbSKris Buschelman PetscFunctionReturn(0); 68114b396bbSKris Buschelman } 682d954db57SHong Zhang 68329b38603SBarry Smith PETSC_EXTERN PetscErrorCode MatSolverPackageRegister_SuperLU(void) 68442c9c57cSBarry Smith { 68542c9c57cSBarry Smith PetscErrorCode ierr; 68642c9c57cSBarry Smith 68742c9c57cSBarry Smith PetscFunctionBegin; 68842c9c57cSBarry Smith ierr = MatSolverPackageRegister(MATSOLVERSUPERLU,MATSEQAIJ, MAT_FACTOR_LU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr); 68942c9c57cSBarry Smith ierr = MatSolverPackageRegister(MATSOLVERSUPERLU,MATSEQAIJ, MAT_FACTOR_ILU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr); 69042c9c57cSBarry Smith PetscFunctionReturn(0); 69142c9c57cSBarry Smith } 692