114b396bbSKris Buschelman 25a46d3faSBarry Smith /* -------------------------------------------------------------------- 35a46d3faSBarry Smith 45a46d3faSBarry Smith This file implements a subclass of the SeqAIJ matrix class that uses 5c2b89b5dSBarry Smith the SuperLU sparse solver. 614b396bbSKris Buschelman */ 714b396bbSKris Buschelman 85a46d3faSBarry Smith /* 95a46d3faSBarry Smith Defines the data structure for the base matrix type (SeqAIJ) 105a46d3faSBarry Smith */ 11c6db04a5SJed Brown #include <../src/mat/impls/aij/seq/aij.h> /*I "petscmat.h" I*/ 1214b396bbSKris Buschelman 135a46d3faSBarry Smith /* 145a46d3faSBarry Smith SuperLU include files 155a46d3faSBarry Smith */ 1614b396bbSKris Buschelman EXTERN_C_BEGIN 1714b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX) 183cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 193cb270beSHong Zhang #include <slu_cdefs.h> 203cb270beSHong Zhang #else 21c6db04a5SJed Brown #include <slu_zdefs.h> 223cb270beSHong Zhang #endif 233cb270beSHong Zhang #else 243cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 253cb270beSHong Zhang #include <slu_sdefs.h> 2614b396bbSKris Buschelman #else 27c6db04a5SJed Brown #include <slu_ddefs.h> 2814b396bbSKris Buschelman #endif 293cb270beSHong Zhang #endif 30c6db04a5SJed Brown #include <slu_util.h> 3114b396bbSKris Buschelman EXTERN_C_END 3214b396bbSKris Buschelman 335a46d3faSBarry Smith /* 34245fece9SBarry Smith This is the data that defines the SuperLU factored matrix type 355a46d3faSBarry Smith */ 3614b396bbSKris Buschelman typedef struct { 3775af56d4SHong Zhang SuperMatrix A,L,U,B,X; 3875af56d4SHong Zhang superlu_options_t options; 39da7a1d00SHong Zhang PetscInt *perm_c; /* column permutation vector */ 40da7a1d00SHong Zhang PetscInt *perm_r; /* row permutations from partial pivoting */ 41da7a1d00SHong Zhang PetscInt *etree; 42da7a1d00SHong Zhang PetscReal *R, *C; 4375af56d4SHong Zhang char equed[1]; 44da7a1d00SHong Zhang PetscInt lwork; 4575af56d4SHong Zhang void *work; 46da7a1d00SHong Zhang PetscReal rpg, rcond; 4775af56d4SHong Zhang mem_usage_t mem_usage; 4875af56d4SHong Zhang MatStructure flg; 495d8b2efaSHong Zhang SuperLUStat_t stat; 50cae5a23dSHong Zhang Mat A_dup; 51cae5a23dSHong Zhang PetscScalar *rhs_dup; 52c147c726SHong Zhang GlobalLU_t Glu; 532366e350SStefano Zampini PetscBool needconversion; 5414b396bbSKris Buschelman 5514b396bbSKris Buschelman /* Flag to clean up (non-global) SuperLU objects during Destroy */ 56ace3abfcSBarry Smith PetscBool CleanUpSuperLU; 57f0c56d0fSKris Buschelman } Mat_SuperLU; 5814b396bbSKris Buschelman 595a46d3faSBarry Smith /* 605a46d3faSBarry Smith Utility function 615a46d3faSBarry Smith */ 62860c79edSBarry Smith static PetscErrorCode MatView_Info_SuperLU(Mat A,PetscViewer viewer) 635a46d3faSBarry Smith { 64245fece9SBarry Smith Mat_SuperLU *lu= (Mat_SuperLU*)A->data; 655a46d3faSBarry Smith PetscErrorCode ierr; 665a46d3faSBarry Smith superlu_options_t options; 675a46d3faSBarry Smith 685a46d3faSBarry Smith PetscFunctionBegin; 695a46d3faSBarry Smith options = lu->options; 702205254eSKarl Rupp 715a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr); 725a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," Equil: %s\n",(options.Equil != NO) ? "YES" : "NO");CHKERRQ(ierr); 73c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," ColPerm: %" PetscInt_FMT "\n",options.ColPerm);CHKERRQ(ierr); 74c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," IterRefine: %" PetscInt_FMT "\n",options.IterRefine);CHKERRQ(ierr); 755a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES" : "NO");CHKERRQ(ierr); 765a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr); 775a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES" : "NO");CHKERRQ(ierr); 785a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES" : "NO");CHKERRQ(ierr); 79c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," RowPerm: %" PetscInt_FMT "\n",options.RowPerm);CHKERRQ(ierr); 805a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES" : "NO");CHKERRQ(ierr); 815a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," PrintStat: %s\n",(options.PrintStat != NO) ? "YES" : "NO");CHKERRQ(ierr); 82c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," lwork: %" PetscInt_FMT "\n",lu->lwork);CHKERRQ(ierr); 83d5f3da31SBarry Smith if (A->factortype == MAT_FACTOR_ILU) { 84cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr); 85cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr); 86cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr); 87c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," ILU_DropRule: %" PetscInt_FMT "\n",options.ILU_DropRule);CHKERRQ(ierr); 88c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," ILU_Norm: %" PetscInt_FMT "\n",options.ILU_Norm);CHKERRQ(ierr); 89c0aa6a63SJacob Faibussowitsch ierr = PetscViewerASCIIPrintf(viewer," ILU_MILU: %" PetscInt_FMT "\n",options.ILU_MILU);CHKERRQ(ierr); 90cffbb591SHong Zhang } 915a46d3faSBarry Smith PetscFunctionReturn(0); 925a46d3faSBarry Smith } 935a46d3faSBarry Smith 94245fece9SBarry Smith PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x) 95245fece9SBarry Smith { 96245fece9SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)A->data; 97245fece9SBarry Smith const PetscScalar *barray; 98245fece9SBarry Smith PetscScalar *xarray; 99245fece9SBarry Smith PetscErrorCode ierr; 100245fece9SBarry Smith PetscInt info,i,n; 101245fece9SBarry Smith PetscReal ferr,berr; 102245fece9SBarry Smith static PetscBool cite = PETSC_FALSE; 103245fece9SBarry Smith 104245fece9SBarry Smith PetscFunctionBegin; 105245fece9SBarry Smith if (lu->lwork == -1) PetscFunctionReturn(0); 106245fece9SBarry Smith ierr = PetscCitationsRegister("@article{superlu99,\n author = {James W. Demmel and Stanley C. Eisenstat and\n John R. Gilbert and Xiaoye S. Li and Joseph W. H. Liu},\n title = {A supernodal approach to sparse partial pivoting},\n journal = {SIAM J. Matrix Analysis and Applications},\n year = {1999},\n volume = {20},\n number = {3},\n pages = {720-755}\n}\n",&cite);CHKERRQ(ierr); 107245fece9SBarry Smith 108245fece9SBarry Smith ierr = VecGetLocalSize(x,&n);CHKERRQ(ierr); 109245fece9SBarry Smith lu->B.ncol = 1; /* Set the number of right-hand side */ 110245fece9SBarry Smith if (lu->options.Equil && !lu->rhs_dup) { 111245fece9SBarry Smith /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */ 112245fece9SBarry Smith ierr = PetscMalloc1(n,&lu->rhs_dup);CHKERRQ(ierr); 113245fece9SBarry Smith } 114245fece9SBarry Smith if (lu->options.Equil) { 115245fece9SBarry Smith /* Copy b into rsh_dup */ 116245fece9SBarry Smith ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 117580bdb30SBarry Smith ierr = PetscArraycpy(lu->rhs_dup,barray,n);CHKERRQ(ierr); 118245fece9SBarry Smith ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 119245fece9SBarry Smith barray = lu->rhs_dup; 120245fece9SBarry Smith } else { 121245fece9SBarry Smith ierr = VecGetArrayRead(b,&barray);CHKERRQ(ierr); 122245fece9SBarry Smith } 123245fece9SBarry Smith ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 124245fece9SBarry Smith 125245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX) 126245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 127245fece9SBarry Smith ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray; 128245fece9SBarry Smith ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray; 129245fece9SBarry Smith #else 130245fece9SBarry Smith ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray; 131245fece9SBarry Smith ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray; 132245fece9SBarry Smith #endif 133245fece9SBarry Smith #else 134245fece9SBarry Smith ((DNformat*)lu->B.Store)->nzval = (void*)barray; 135245fece9SBarry Smith ((DNformat*)lu->X.Store)->nzval = xarray; 136245fece9SBarry Smith #endif 137245fece9SBarry Smith 138245fece9SBarry Smith lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ 139245fece9SBarry Smith if (A->factortype == MAT_FACTOR_LU) { 140245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX) 141245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 142245fece9SBarry Smith PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 143245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 144245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 145245fece9SBarry Smith #else 146245fece9SBarry Smith PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 147245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 148245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 149245fece9SBarry Smith #endif 150245fece9SBarry Smith #else 151245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 152245fece9SBarry Smith PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 153245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 154245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 155245fece9SBarry Smith #else 156245fece9SBarry Smith PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 157245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 158245fece9SBarry Smith &lu->Glu,&lu->mem_usage, &lu->stat, &info)); 159245fece9SBarry Smith #endif 160245fece9SBarry Smith #endif 161245fece9SBarry Smith } else if (A->factortype == MAT_FACTOR_ILU) { 162245fece9SBarry Smith #if defined(PETSC_USE_COMPLEX) 163245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 164245fece9SBarry Smith PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 165245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 166245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 167245fece9SBarry Smith #else 168245fece9SBarry Smith PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 169245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 170245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 171245fece9SBarry Smith #endif 172245fece9SBarry Smith #else 173245fece9SBarry Smith #if defined(PETSC_USE_REAL_SINGLE) 174245fece9SBarry Smith PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 175245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 176245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 177245fece9SBarry Smith #else 178245fece9SBarry Smith PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 179245fece9SBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 180245fece9SBarry Smith &lu->Glu, &lu->mem_usage, &lu->stat, &info)); 181245fece9SBarry Smith #endif 182245fece9SBarry Smith #endif 183245fece9SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 184245fece9SBarry Smith if (!lu->options.Equil) { 185245fece9SBarry Smith ierr = VecRestoreArrayRead(b,&barray);CHKERRQ(ierr); 186245fece9SBarry Smith } 187245fece9SBarry Smith ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 188245fece9SBarry Smith 189245fece9SBarry Smith if (!info || info == lu->A.ncol+1) { 190245fece9SBarry Smith if (lu->options.IterRefine) { 191245fece9SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n"); 192245fece9SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); 193245fece9SBarry Smith for (i = 0; i < 1; ++i) { 194245fece9SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr); 195245fece9SBarry Smith } 196245fece9SBarry Smith } 197245fece9SBarry Smith } else if (info > 0) { 198245fece9SBarry Smith if (lu->lwork == -1) { 199c0aa6a63SJacob Faibussowitsch ierr = PetscPrintf(PETSC_COMM_SELF," ** Estimated memory: %" PetscInt_FMT " bytes\n", info - lu->A.ncol); 200245fece9SBarry Smith } else { 201c0aa6a63SJacob Faibussowitsch ierr = PetscPrintf(PETSC_COMM_SELF," Warning: gssvx() returns info %" PetscInt_FMT "\n",info); 202245fece9SBarry Smith } 203*98921bdaSJacob Faibussowitsch } else if (info < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", info,-info); 204245fece9SBarry Smith 205245fece9SBarry Smith if (lu->options.PrintStat) { 206245fece9SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n"); 207245fece9SBarry Smith PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat)); 208245fece9SBarry Smith } 209245fece9SBarry Smith PetscFunctionReturn(0); 210245fece9SBarry Smith } 211245fece9SBarry Smith 212245fece9SBarry Smith PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x) 213245fece9SBarry Smith { 214245fece9SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)A->data; 215245fece9SBarry Smith PetscErrorCode ierr; 216245fece9SBarry Smith 217245fece9SBarry Smith PetscFunctionBegin; 218603e8f96SBarry Smith if (A->factorerrortype) { 219245fece9SBarry Smith ierr = PetscInfo(A,"MatSolve is called with singular matrix factor, skip\n");CHKERRQ(ierr); 220245fece9SBarry Smith ierr = VecSetInf(x);CHKERRQ(ierr); 221245fece9SBarry Smith PetscFunctionReturn(0); 222245fece9SBarry Smith } 223245fece9SBarry Smith 224245fece9SBarry Smith lu->options.Trans = TRANS; 225245fece9SBarry Smith ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr); 226245fece9SBarry Smith PetscFunctionReturn(0); 227245fece9SBarry Smith } 228245fece9SBarry Smith 229245fece9SBarry Smith PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x) 230245fece9SBarry Smith { 231245fece9SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)A->data; 232245fece9SBarry Smith PetscErrorCode ierr; 233245fece9SBarry Smith 234245fece9SBarry Smith PetscFunctionBegin; 235603e8f96SBarry Smith if (A->factorerrortype) { 236245fece9SBarry Smith ierr = PetscInfo(A,"MatSolve is called with singular matrix factor, skip\n");CHKERRQ(ierr); 237245fece9SBarry Smith ierr = VecSetInf(x);CHKERRQ(ierr); 238245fece9SBarry Smith PetscFunctionReturn(0); 239245fece9SBarry Smith } 240245fece9SBarry Smith 241245fece9SBarry Smith lu->options.Trans = NOTRANS; 242245fece9SBarry Smith ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr); 243245fece9SBarry Smith PetscFunctionReturn(0); 244245fece9SBarry Smith } 245245fece9SBarry Smith 246245fece9SBarry Smith static PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info) 2475a46d3faSBarry Smith { 248245fece9SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)F->data; 249cae5a23dSHong Zhang Mat_SeqAIJ *aa; 2505a46d3faSBarry Smith PetscErrorCode ierr; 2515a46d3faSBarry Smith PetscInt sinfo; 2525a46d3faSBarry Smith PetscReal ferr, berr; 2535a46d3faSBarry Smith NCformat *Ustore; 2545a46d3faSBarry Smith SCformat *Lstore; 2555a46d3faSBarry Smith 2565a46d3faSBarry Smith PetscFunctionBegin; 2575a46d3faSBarry Smith if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */ 2585a46d3faSBarry Smith lu->options.Fact = SamePattern; 2595a46d3faSBarry Smith /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */ 2605a46d3faSBarry Smith Destroy_SuperMatrix_Store(&lu->A); 2612366e350SStefano Zampini if (lu->A_dup) { 262cae5a23dSHong Zhang ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 263cae5a23dSHong Zhang } 2642366e350SStefano Zampini 2655a46d3faSBarry Smith if (lu->lwork >= 0) { 266d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L)); 267d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U)); 2685a46d3faSBarry Smith lu->options.Fact = SamePattern; 2695a46d3faSBarry Smith } 2705a46d3faSBarry Smith } 2715a46d3faSBarry Smith 2725a46d3faSBarry Smith /* Create the SuperMatrix for lu->A=A^T: 2735a46d3faSBarry Smith Since SuperLU likes column-oriented matrices,we pass it the transpose, 2745a46d3faSBarry Smith and then solve A^T X = B in MatSolve(). */ 2752366e350SStefano Zampini if (lu->A_dup) { 276cae5a23dSHong Zhang aa = (Mat_SeqAIJ*)(lu->A_dup)->data; 277cae5a23dSHong Zhang } else { 278cae5a23dSHong Zhang aa = (Mat_SeqAIJ*)(A)->data; 279cae5a23dSHong Zhang } 2805a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX) 2813cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 282d387c056SBarry Smith PetscStackCall("SuperLU:cCreate_CompCol_Matrix",cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE)); 2833cb270beSHong Zhang #else 284d387c056SBarry Smith PetscStackCall("SuperLU:zCreate_CompCol_Matrix",zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE)); 2853cb270beSHong Zhang #endif 2863cb270beSHong Zhang #else 2873cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 288d387c056SBarry Smith PetscStackCall("SuperLU:sCreate_CompCol_Matrix",sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE)); 2895a46d3faSBarry Smith #else 290d387c056SBarry Smith PetscStackCall("SuperLU:dCreate_CompCol_Matrix",dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE)); 2915a46d3faSBarry Smith #endif 2923cb270beSHong Zhang #endif 2935a46d3faSBarry Smith 2945a46d3faSBarry Smith /* Numerical factorization */ 2955a46d3faSBarry Smith lu->B.ncol = 0; /* Indicate not to solve the system */ 296d5f3da31SBarry Smith if (F->factortype == MAT_FACTOR_LU) { 2975a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX) 2983cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 299d387c056SBarry Smith PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 3003cb270beSHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 3015db81dd2SSatish Balay &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 3023cb270beSHong Zhang #else 303d387c056SBarry Smith PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 3045a46d3faSBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 3055db81dd2SSatish Balay &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 3063cb270beSHong Zhang #endif 3073cb270beSHong Zhang #else 3083cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 309d387c056SBarry Smith PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 3103cb270beSHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 3115db81dd2SSatish Balay &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 3125a46d3faSBarry Smith #else 313d387c056SBarry Smith PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 3145a46d3faSBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 315c147c726SHong Zhang &lu->Glu,&lu->mem_usage, &lu->stat, &sinfo)); 3165a46d3faSBarry Smith #endif 3173cb270beSHong Zhang #endif 318d5f3da31SBarry Smith } else if (F->factortype == MAT_FACTOR_ILU) { 319cffbb591SHong Zhang /* Compute the incomplete factorization, condition number and pivot growth */ 320cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX) 3213cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 322d387c056SBarry Smith PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C, 3233cb270beSHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 3245db81dd2SSatish Balay &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 3253cb270beSHong Zhang #else 326d387c056SBarry Smith PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C, 327cffbb591SHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 3285db81dd2SSatish Balay &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 3293cb270beSHong Zhang #endif 3303cb270beSHong Zhang #else 3313cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 332d387c056SBarry Smith PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 3333cb270beSHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 3345db81dd2SSatish Balay &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 335cffbb591SHong Zhang #else 336d387c056SBarry Smith PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 337cffbb591SHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 338c147c726SHong Zhang &lu->Glu, &lu->mem_usage, &lu->stat, &sinfo)); 339cffbb591SHong Zhang #endif 3403cb270beSHong Zhang #endif 341f23aa3ddSBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 3425a46d3faSBarry Smith if (!sinfo || sinfo == lu->A.ncol+1) { 3432205254eSKarl Rupp if (lu->options.PivotGrowth) { 344675d1226SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," Recip. pivot growth = %e\n", lu->rpg);CHKERRQ(ierr); 3452205254eSKarl Rupp } 3462205254eSKarl Rupp if (lu->options.ConditionNumber) { 347675d1226SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," Recip. condition number = %e\n", lu->rcond);CHKERRQ(ierr); 3482205254eSKarl Rupp } 3495a46d3faSBarry Smith } else if (sinfo > 0) { 350675d1226SHong Zhang if (A->erroriffailure) { 351*98921bdaSJacob Faibussowitsch SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %" PetscInt_FMT,sinfo); 352675d1226SHong Zhang } else { 353675d1226SHong Zhang if (sinfo <= lu->A.ncol) { 354675d1226SHong Zhang if (lu->options.ILU_FillTol == 0.0) { 355603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 356675d1226SHong Zhang } 357c0aa6a63SJacob Faibussowitsch ierr = PetscInfo2(F,"Number of zero pivots %" PetscInt_FMT ", ILU_FillTol %g\n",sinfo,lu->options.ILU_FillTol);CHKERRQ(ierr); 358675d1226SHong Zhang } else if (sinfo == lu->A.ncol + 1) { 359675d1226SHong Zhang /* 360675d1226SHong Zhang U is nonsingular, but RCOND is less than machine 361675d1226SHong Zhang precision, meaning that the matrix is singular to 362675d1226SHong Zhang working precision. Nevertheless, the solution and 363675d1226SHong Zhang error bounds are computed because there are a number 364675d1226SHong Zhang of situations where the computed solution can be more 365675d1226SHong Zhang accurate than the value of RCOND would suggest. 366675d1226SHong Zhang */ 367c0aa6a63SJacob Faibussowitsch ierr = PetscInfo1(F,"Matrix factor U is nonsingular, but is singular to working precision. The solution is computed. info %" PetscInt_FMT,sinfo);CHKERRQ(ierr); 368675d1226SHong Zhang } else { /* sinfo > lu->A.ncol + 1 */ 369603e8f96SBarry Smith F->factorerrortype = MAT_FACTOR_OUTMEMORY; 370c0aa6a63SJacob Faibussowitsch ierr = PetscInfo1(F,"Number of bytes allocated when memory allocation fails %" PetscInt_FMT "\n",sinfo);CHKERRQ(ierr); 371675d1226SHong Zhang } 372675d1226SHong Zhang } 373*98921bdaSJacob Faibussowitsch } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %" PetscInt_FMT ", the %" PetscInt_FMT "-th argument in gssvx() had an illegal value", sinfo,-sinfo); 3745a46d3faSBarry Smith 3755a46d3faSBarry Smith if (lu->options.PrintStat) { 376675d1226SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");CHKERRQ(ierr); 377d387c056SBarry Smith PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat)); 3785a46d3faSBarry Smith Lstore = (SCformat*) lu->L.Store; 3795a46d3faSBarry Smith Ustore = (NCformat*) lu->U.Store; 380c0aa6a63SJacob Faibussowitsch ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in factor L = %" PetscInt_FMT "\n", Lstore->nnz); 381c0aa6a63SJacob Faibussowitsch ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in factor U = %" PetscInt_FMT "\n", Ustore->nnz); 382c0aa6a63SJacob Faibussowitsch ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in L+U = %" PetscInt_FMT "\n", Lstore->nnz + Ustore->nnz - lu->A.ncol); 3836da386baSHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," L\\U MB %.3f\ttotal MB needed %.3f\n", 3846da386baSHong Zhang lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6); 3855a46d3faSBarry Smith } 3865a46d3faSBarry Smith 3875a46d3faSBarry Smith lu->flg = SAME_NONZERO_PATTERN; 3881d5ca7f3SHong Zhang F->ops->solve = MatSolve_SuperLU; 3891d5ca7f3SHong Zhang F->ops->solvetranspose = MatSolveTranspose_SuperLU; 3901aef8b4cSStefano Zampini F->ops->matsolve = NULL; 3915a46d3faSBarry Smith PetscFunctionReturn(0); 3925a46d3faSBarry Smith } 3935a46d3faSBarry Smith 394245fece9SBarry Smith static PetscErrorCode MatDestroy_SuperLU(Mat A) 39514b396bbSKris Buschelman { 396dfbe8321SBarry Smith PetscErrorCode ierr; 397245fece9SBarry Smith Mat_SuperLU *lu=(Mat_SuperLU*)A->data; 39814b396bbSKris Buschelman 39914b396bbSKris Buschelman PetscFunctionBegin; 400245fece9SBarry Smith if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */ 401d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->A)); 402d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->B)); 403d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->X)); 404d387c056SBarry Smith PetscStackCall("SuperLU:StatFree",StatFree(&lu->stat)); 4050e742b69SHong Zhang if (lu->lwork >= 0) { 406d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L)); 407d387c056SBarry Smith PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U)); 4080e742b69SHong Zhang } 4090e742b69SHong Zhang } 4109ce50919SHong Zhang ierr = PetscFree(lu->etree);CHKERRQ(ierr); 4119ce50919SHong Zhang ierr = PetscFree(lu->perm_r);CHKERRQ(ierr); 4129ce50919SHong Zhang ierr = PetscFree(lu->perm_c);CHKERRQ(ierr); 4139ce50919SHong Zhang ierr = PetscFree(lu->R);CHKERRQ(ierr); 4149ce50919SHong Zhang ierr = PetscFree(lu->C);CHKERRQ(ierr); 415bf0cc555SLisandro Dalcin ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr); 416bf0cc555SLisandro Dalcin ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr); 417245fece9SBarry Smith ierr = PetscFree(A->data);CHKERRQ(ierr); 4180e742b69SHong Zhang 419d954db57SHong Zhang /* clear composed functions */ 4203ca39a21SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 421bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluSetILUDropTol_C",NULL);CHKERRQ(ierr); 42214b396bbSKris Buschelman PetscFunctionReturn(0); 42314b396bbSKris Buschelman } 42414b396bbSKris Buschelman 425245fece9SBarry Smith static PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer) 42614b396bbSKris Buschelman { 427dfbe8321SBarry Smith PetscErrorCode ierr; 428ace3abfcSBarry Smith PetscBool iascii; 42914b396bbSKris Buschelman PetscViewerFormat format; 43014b396bbSKris Buschelman 43114b396bbSKris Buschelman PetscFunctionBegin; 432251f4c67SDmitry Karpeev ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 43332077d6dSBarry Smith if (iascii) { 43414b396bbSKris Buschelman ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 4352f59403fSHong Zhang if (format == PETSC_VIEWER_ASCII_INFO) { 436860c79edSBarry Smith ierr = MatView_Info_SuperLU(A,viewer);CHKERRQ(ierr); 43714b396bbSKris Buschelman } 43814b396bbSKris Buschelman } 43914b396bbSKris Buschelman PetscFunctionReturn(0); 44014b396bbSKris Buschelman } 44114b396bbSKris Buschelman 442e0b74bf9SHong Zhang PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X) 443e0b74bf9SHong Zhang { 444245fece9SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)A->data; 445cd723cd1SBarry Smith PetscBool flg; 446cd723cd1SBarry Smith PetscErrorCode ierr; 447e0b74bf9SHong Zhang 448e0b74bf9SHong Zhang PetscFunctionBegin; 4490298fd71SBarry Smith ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 450ce94432eSBarry Smith if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 4510298fd71SBarry Smith ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 452ce94432eSBarry Smith if (!flg) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 4532205254eSKarl Rupp lu->options.Trans = TRANS; 454e0b74bf9SHong Zhang SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet"); 455e0b74bf9SHong Zhang PetscFunctionReturn(0); 456e0b74bf9SHong Zhang } 457e0b74bf9SHong Zhang 458245fece9SBarry Smith static PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 45914b396bbSKris Buschelman { 460245fece9SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)(F->data); 4612366e350SStefano Zampini PetscErrorCode ierr; 462b24902e0SBarry Smith 463b24902e0SBarry Smith PetscFunctionBegin; 464b24902e0SBarry Smith lu->flg = DIFFERENT_NONZERO_PATTERN; 465b24902e0SBarry Smith lu->CleanUpSuperLU = PETSC_TRUE; 4661d5ca7f3SHong Zhang F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU; 4672366e350SStefano Zampini 4682366e350SStefano Zampini /* if we are here, the nonzero pattern has changed unless the user explicitly called MatLUFactorSymbolic */ 4692366e350SStefano Zampini ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr); 4702366e350SStefano Zampini if (lu->needconversion) { 4712366e350SStefano Zampini ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&lu->A_dup);CHKERRQ(ierr); 4722366e350SStefano Zampini } 4732366e350SStefano Zampini if (lu->options.Equil == YES && !lu->A_dup) { /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */ 4742366e350SStefano Zampini ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr); 4752366e350SStefano Zampini } 476b24902e0SBarry Smith PetscFunctionReturn(0); 477b24902e0SBarry Smith } 478b24902e0SBarry Smith 479b2573a8aSBarry Smith static PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol) 4805ccb76cbSHong Zhang { 481245fece9SBarry Smith Mat_SuperLU *lu= (Mat_SuperLU*)F->data; 4825ccb76cbSHong Zhang 4835ccb76cbSHong Zhang PetscFunctionBegin; 4845ccb76cbSHong Zhang lu->options.ILU_DropTol = dtol; 4855ccb76cbSHong Zhang PetscFunctionReturn(0); 4865ccb76cbSHong Zhang } 4875ccb76cbSHong Zhang 4885ccb76cbSHong Zhang /*@ 4895ccb76cbSHong Zhang MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance 4905ccb76cbSHong Zhang Logically Collective on Mat 4915ccb76cbSHong Zhang 4925ccb76cbSHong Zhang Input Parameters: 4935ccb76cbSHong Zhang + F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface 4945ccb76cbSHong Zhang - dtol - drop tolerance 4955ccb76cbSHong Zhang 4965ccb76cbSHong Zhang Options Database: 4975ccb76cbSHong Zhang . -mat_superlu_ilu_droptol <dtol> 4985ccb76cbSHong Zhang 4995ccb76cbSHong Zhang Level: beginner 5005ccb76cbSHong Zhang 50196a0c994SBarry Smith References: 50296a0c994SBarry Smith . SuperLU Users' Guide 5035ccb76cbSHong Zhang 5045ccb76cbSHong Zhang .seealso: MatGetFactor() 5055ccb76cbSHong Zhang @*/ 5065ccb76cbSHong Zhang PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol) 5075ccb76cbSHong Zhang { 5085ccb76cbSHong Zhang PetscErrorCode ierr; 5095ccb76cbSHong Zhang 5105ccb76cbSHong Zhang PetscFunctionBegin; 5115ccb76cbSHong Zhang PetscValidHeaderSpecific(F,MAT_CLASSID,1); 51269fbec6eSBarry Smith PetscValidLogicalCollectiveReal(F,dtol,2); 5135ccb76cbSHong Zhang ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr); 5145ccb76cbSHong Zhang PetscFunctionReturn(0); 5155ccb76cbSHong Zhang } 5165ccb76cbSHong Zhang 517ea799195SBarry Smith PetscErrorCode MatFactorGetSolverType_seqaij_superlu(Mat A,MatSolverType *type) 51835bd34faSBarry Smith { 51935bd34faSBarry Smith PetscFunctionBegin; 5202692d6eeSBarry Smith *type = MATSOLVERSUPERLU; 52135bd34faSBarry Smith PetscFunctionReturn(0); 52235bd34faSBarry Smith } 52335bd34faSBarry Smith 524b24902e0SBarry Smith /*MC 525ba992d64SSatish Balay MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices 526b24902e0SBarry Smith via the external package SuperLU. 527b24902e0SBarry Smith 528e2e64c6bSBarry Smith Use ./configure --download-superlu to have PETSc installed with SuperLU 529b24902e0SBarry Smith 5303ca39a21SBarry Smith Use -pc_type lu -pc_factor_mat_solver_type superlu to use this direct solver 531c2b89b5dSBarry Smith 532b24902e0SBarry Smith Options Database Keys: 533e08999f5SMatthew G Knepley + -mat_superlu_equil <FALSE> - Equil (None) 534e08999f5SMatthew G Knepley . -mat_superlu_colperm <COLAMD> - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD 535e08999f5SMatthew G Knepley . -mat_superlu_iterrefine <NOREFINE> - (choose one of) NOREFINE SINGLE DOUBLE EXTRA 536e08999f5SMatthew G Knepley . -mat_superlu_symmetricmode: <FALSE> - SymmetricMode (None) 537e08999f5SMatthew G Knepley . -mat_superlu_diagpivotthresh <1> - DiagPivotThresh (None) 538e08999f5SMatthew G Knepley . -mat_superlu_pivotgrowth <FALSE> - PivotGrowth (None) 539e08999f5SMatthew G Knepley . -mat_superlu_conditionnumber <FALSE> - ConditionNumber (None) 540e08999f5SMatthew G Knepley . -mat_superlu_rowperm <NOROWPERM> - (choose one of) NOROWPERM LargeDiag 541e08999f5SMatthew G Knepley . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None) 542e08999f5SMatthew G Knepley . -mat_superlu_printstat <FALSE> - PrintStat (None) 543e08999f5SMatthew G Knepley . -mat_superlu_lwork <0> - size of work array in bytes used by factorization (None) 544e08999f5SMatthew G Knepley . -mat_superlu_ilu_droptol <0> - ILU_DropTol (None) 545e08999f5SMatthew G Knepley . -mat_superlu_ilu_filltol <0> - ILU_FillTol (None) 546e08999f5SMatthew G Knepley . -mat_superlu_ilu_fillfactor <0> - ILU_FillFactor (None) 547e08999f5SMatthew G Knepley . -mat_superlu_ilu_droprull <0> - ILU_DropRule (None) 548e08999f5SMatthew G Knepley . -mat_superlu_ilu_norm <0> - ILU_Norm (None) 549e08999f5SMatthew G Knepley - -mat_superlu_ilu_milu <0> - ILU_MILU (None) 550b24902e0SBarry Smith 55195452b02SPatrick Sanan Notes: 55295452b02SPatrick Sanan Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves 5535c9eb25fSBarry Smith 5542c7c0729SBarry Smith Cannot currently use ordering provided by PETSc. 5552c7c0729SBarry Smith 556b24902e0SBarry Smith Level: beginner 557b24902e0SBarry Smith 5583ca39a21SBarry Smith .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverType(), MatSolverType 559b24902e0SBarry Smith M*/ 560b24902e0SBarry Smith 561db87b0f2SBarry Smith static PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F) 562b24902e0SBarry Smith { 56314b396bbSKris Buschelman Mat B; 564f0c56d0fSKris Buschelman Mat_SuperLU *lu; 5656849ba73SBarry Smith PetscErrorCode ierr; 5665d8b2efaSHong Zhang PetscInt indx,m=A->rmap->n,n=A->cmap->n; 5678afaa268SBarry Smith PetscBool flg,set; 5683cb270beSHong Zhang PetscReal real_input; 5695ca28756SHong Zhang const char *colperm[] ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */ 5705ca28756SHong Zhang const char *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"}; 5715ca28756SHong Zhang const char *rowperm[] ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */ 57214b396bbSKris Buschelman 57314b396bbSKris Buschelman PetscFunctionBegin; 574ce94432eSBarry Smith ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 575d0f46423SBarry Smith ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 576245fece9SBarry Smith ierr = PetscStrallocpy("superlu",&((PetscObject)B)->type_name);CHKERRQ(ierr); 577245fece9SBarry Smith ierr = MatSetUp(B);CHKERRQ(ierr); 57866e17bc3SBarry Smith B->trivialsymbolic = PETSC_TRUE; 579cffbb591SHong Zhang if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) { 580b24902e0SBarry Smith B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU; 581cffbb591SHong Zhang B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU; 582b3a44c85SBarry Smith } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported"); 583cffbb591SHong Zhang 58400c67f3bSHong Zhang ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 58500c67f3bSHong Zhang ierr = PetscStrallocpy(MATSOLVERSUPERLU,&B->solvertype);CHKERRQ(ierr); 58600c67f3bSHong Zhang 587b9c12af5SBarry Smith B->ops->getinfo = MatGetInfo_External; 588b24902e0SBarry Smith B->ops->destroy = MatDestroy_SuperLU; 5893519fcdcSHong Zhang B->ops->view = MatView_SuperLU; 590d5f3da31SBarry Smith B->factortype = ftype; 59194b7f48cSBarry Smith B->assembled = PETSC_TRUE; /* required by -ksp_view */ 5925c9eb25fSBarry Smith B->preallocated = PETSC_TRUE; 59314b396bbSKris Buschelman 594b00a9115SJed Brown ierr = PetscNewLog(B,&lu);CHKERRQ(ierr); 595cae5a23dSHong Zhang 596cffbb591SHong Zhang if (ftype == MAT_FACTOR_LU) { 5979ce50919SHong Zhang set_default_options(&lu->options); 5983d6581fbSHong Zhang /* Comments from SuperLU_4.0/SRC/dgssvx.c: 5993d6581fbSHong Zhang "Whether or not the system will be equilibrated depends on the 6003d6581fbSHong Zhang scaling of the matrix A, but if equilibration is used, A is 6013d6581fbSHong Zhang overwritten by diag(R)*A*diag(C) and B by diag(R)*B 6023d6581fbSHong Zhang (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)." 6033d6581fbSHong Zhang We set 'options.Equil = NO' as default because additional space is needed for it. 6043d6581fbSHong Zhang */ 6053d6581fbSHong Zhang lu->options.Equil = NO; 606cffbb591SHong Zhang } else if (ftype == MAT_FACTOR_ILU) { 6070c74a584SJed Brown /* Set the default input options of ilu: */ 608d387c056SBarry Smith PetscStackCall("SuperLU:ilu_set_default_options",ilu_set_default_options(&lu->options)); 609cffbb591SHong Zhang } 6109ce50919SHong Zhang lu->options.PrintStat = NO; 6111a2e2f44SHong Zhang 6125d8b2efaSHong Zhang /* Initialize the statistics variables. */ 613d387c056SBarry Smith PetscStackCall("SuperLU:StatInit",StatInit(&lu->stat)); 6148914a3f7SHong Zhang lu->lwork = 0; /* allocate space internally by system malloc */ 6159ce50919SHong Zhang 616ce94432eSBarry Smith ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr); 6178afaa268SBarry Smith ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,NULL);CHKERRQ(ierr); 6188914a3f7SHong Zhang ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr); 6192205254eSKarl Rupp if (flg) lu->options.ColPerm = (colperm_t)indx; 6208914a3f7SHong Zhang ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr); 6212205254eSKarl Rupp if (flg) lu->options.IterRefine = (IterRefine_t)indx; 6228afaa268SBarry Smith ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,&set);CHKERRQ(ierr); 6238afaa268SBarry Smith if (set && flg) lu->options.SymmetricMode = YES; 6243cb270beSHong Zhang ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr); 6253cb270beSHong Zhang if (flg) lu->options.DiagPivotThresh = (double) real_input; 6268afaa268SBarry Smith ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,&set);CHKERRQ(ierr); 6278afaa268SBarry Smith if (set && flg) lu->options.PivotGrowth = YES; 6288afaa268SBarry Smith ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,&set);CHKERRQ(ierr); 6298afaa268SBarry Smith if (set && flg) lu->options.ConditionNumber = YES; 630d7ebd59bSHong Zhang ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr); 6312205254eSKarl Rupp if (flg) lu->options.RowPerm = (rowperm_t)indx; 6328afaa268SBarry Smith ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,&set);CHKERRQ(ierr); 6338afaa268SBarry Smith if (set && flg) lu->options.ReplaceTinyPivot = YES; 6348afaa268SBarry Smith ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,&set);CHKERRQ(ierr); 6358afaa268SBarry Smith if (set && flg) lu->options.PrintStat = YES; 6360298fd71SBarry Smith ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,NULL);CHKERRQ(ierr); 6375fe6150dSHong Zhang if (lu->lwork > 0) { 638d87de817SBarry Smith /* lwork is in bytes, hence PetscMalloc() is used here, not PetscMalloc1()*/ 6395fe6150dSHong Zhang ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr); 6405fe6150dSHong Zhang } else if (lu->lwork != 0 && lu->lwork != -1) { 641c0aa6a63SJacob Faibussowitsch ierr = PetscPrintf(PETSC_COMM_SELF," Warning: lwork %" PetscInt_FMT " is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork); 6428914a3f7SHong Zhang lu->lwork = 0; 6438914a3f7SHong Zhang } 644cffbb591SHong Zhang /* ilu options */ 6453cb270beSHong Zhang ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr); 6463cb270beSHong Zhang if (flg) lu->options.ILU_DropTol = (double) real_input; 6473cb270beSHong Zhang ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr); 6483cb270beSHong Zhang if (flg) lu->options.ILU_FillTol = (double) real_input; 6493cb270beSHong Zhang ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr); 6503cb270beSHong Zhang if (flg) lu->options.ILU_FillFactor = (double) real_input; 6510298fd71SBarry Smith ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,NULL);CHKERRQ(ierr); 652cffbb591SHong Zhang ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr); 6532205254eSKarl Rupp if (flg) lu->options.ILU_Norm = (norm_t)indx; 654cffbb591SHong Zhang ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr); 6552205254eSKarl Rupp if (flg) lu->options.ILU_MILU = (milu_t)indx; 656245fece9SBarry Smith ierr = PetscOptionsEnd();CHKERRQ(ierr); 6579ce50919SHong Zhang 6585d8b2efaSHong Zhang /* Allocate spaces (notice sizes are for the transpose) */ 659785e854fSJed Brown ierr = PetscMalloc1(m,&lu->etree);CHKERRQ(ierr); 660785e854fSJed Brown ierr = PetscMalloc1(n,&lu->perm_r);CHKERRQ(ierr); 661785e854fSJed Brown ierr = PetscMalloc1(m,&lu->perm_c);CHKERRQ(ierr); 662785e854fSJed Brown ierr = PetscMalloc1(n,&lu->R);CHKERRQ(ierr); 663785e854fSJed Brown ierr = PetscMalloc1(m,&lu->C);CHKERRQ(ierr); 6645d8b2efaSHong Zhang 6655d8b2efaSHong Zhang /* create rhs and solution x without allocate space for .Store */ 6665d8b2efaSHong Zhang #if defined(PETSC_USE_COMPLEX) 6673cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 668d387c056SBarry Smith PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE)); 669d387c056SBarry Smith PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE)); 6703cb270beSHong Zhang #else 671d387c056SBarry Smith PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE)); 672d387c056SBarry Smith PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE)); 6733cb270beSHong Zhang #endif 6743cb270beSHong Zhang #else 6753cb270beSHong Zhang #if defined(PETSC_USE_REAL_SINGLE) 676d387c056SBarry Smith PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE)); 677d387c056SBarry Smith PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE)); 6785d8b2efaSHong Zhang #else 679d387c056SBarry Smith PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE)); 680d387c056SBarry Smith PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE)); 6815d8b2efaSHong Zhang #endif 6823cb270beSHong Zhang #endif 6835d8b2efaSHong Zhang 6843ca39a21SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_seqaij_superlu);CHKERRQ(ierr); 685bdf89e91SBarry Smith ierr = PetscObjectComposeFunction((PetscObject)B,"MatSuperluSetILUDropTol_C",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr); 686245fece9SBarry Smith B->data = lu; 68720be8e61SHong Zhang 688899d7b4fSKris Buschelman *F = B; 68914b396bbSKris Buschelman PetscFunctionReturn(0); 69014b396bbSKris Buschelman } 691d954db57SHong Zhang 6922366e350SStefano Zampini static PetscErrorCode MatGetFactor_seqsell_superlu(Mat A,MatFactorType ftype,Mat *F) 6932366e350SStefano Zampini { 6942366e350SStefano Zampini Mat_SuperLU *lu; 6952366e350SStefano Zampini PetscErrorCode ierr; 6962366e350SStefano Zampini 6972366e350SStefano Zampini PetscFunctionBegin; 6982366e350SStefano Zampini ierr = MatGetFactor_seqaij_superlu(A,ftype,F);CHKERRQ(ierr); 6992366e350SStefano Zampini lu = (Mat_SuperLU*)((*F)->data); 7002366e350SStefano Zampini lu->needconversion = PETSC_TRUE; 7012366e350SStefano Zampini PetscFunctionReturn(0); 7022366e350SStefano Zampini } 7032366e350SStefano Zampini 7043ca39a21SBarry Smith PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU(void) 70542c9c57cSBarry Smith { 70642c9c57cSBarry Smith PetscErrorCode ierr; 70742c9c57cSBarry Smith 70842c9c57cSBarry Smith PetscFunctionBegin; 7093ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERSUPERLU,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr); 7103ca39a21SBarry Smith ierr = MatSolverTypeRegister(MATSOLVERSUPERLU,MATSEQAIJ,MAT_FACTOR_ILU,MatGetFactor_seqaij_superlu);CHKERRQ(ierr); 7112366e350SStefano Zampini ierr = MatSolverTypeRegister(MATSOLVERSUPERLU,MATSEQSELL,MAT_FACTOR_LU,MatGetFactor_seqsell_superlu);CHKERRQ(ierr); 7122366e350SStefano Zampini ierr = MatSolverTypeRegister(MATSOLVERSUPERLU,MATSEQSELL,MAT_FACTOR_ILU,MatGetFactor_seqsell_superlu);CHKERRQ(ierr); 71342c9c57cSBarry Smith PetscFunctionReturn(0); 71442c9c57cSBarry Smith } 715