1be1d678aSKris Buschelman #define PETSCMAT_DLL 214b396bbSKris Buschelman 35a46d3faSBarry Smith /* -------------------------------------------------------------------- 45a46d3faSBarry Smith 55a46d3faSBarry Smith This file implements a subclass of the SeqAIJ matrix class that uses 65a46d3faSBarry Smith the SuperLU 3.0 sparse solver. You can use this as a starting point for 75a46d3faSBarry Smith implementing your own subclass of a PETSc matrix class. 85a46d3faSBarry Smith 95a46d3faSBarry Smith This demonstrates a way to make an implementation inheritence of a PETSc 105a46d3faSBarry Smith matrix type. This means constructing a new matrix type (SuperLU) by changing some 115a46d3faSBarry Smith of the methods of the previous type (SeqAIJ), adding additional data, and possibly 125a46d3faSBarry Smith additional method. (See any book on object oriented programming). 1314b396bbSKris Buschelman */ 1414b396bbSKris Buschelman 155a46d3faSBarry Smith /* 165a46d3faSBarry Smith Defines the data structure for the base matrix type (SeqAIJ) 175a46d3faSBarry Smith */ 187c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/aij.h" 1914b396bbSKris Buschelman 205a46d3faSBarry Smith /* 215a46d3faSBarry Smith SuperLU include files 225a46d3faSBarry Smith */ 2314b396bbSKris Buschelman EXTERN_C_BEGIN 2414b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX) 2566f57492SHong Zhang #include "slu_zdefs.h" 2614b396bbSKris Buschelman #else 2766f57492SHong Zhang #include "slu_ddefs.h" 2814b396bbSKris Buschelman #endif 2966f57492SHong Zhang #include "slu_util.h" 3014b396bbSKris Buschelman EXTERN_C_END 3114b396bbSKris Buschelman 325a46d3faSBarry Smith /* 335a46d3faSBarry Smith This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type 345a46d3faSBarry Smith */ 3514b396bbSKris Buschelman typedef struct { 3675af56d4SHong Zhang SuperMatrix A,L,U,B,X; 3775af56d4SHong Zhang superlu_options_t options; 38da7a1d00SHong Zhang PetscInt *perm_c; /* column permutation vector */ 39da7a1d00SHong Zhang PetscInt *perm_r; /* row permutations from partial pivoting */ 40da7a1d00SHong Zhang PetscInt *etree; 41da7a1d00SHong Zhang PetscReal *R, *C; 4275af56d4SHong Zhang char equed[1]; 43da7a1d00SHong Zhang PetscInt lwork; 4475af56d4SHong Zhang void *work; 45da7a1d00SHong Zhang PetscReal rpg, rcond; 4675af56d4SHong Zhang mem_usage_t mem_usage; 4775af56d4SHong Zhang MatStructure flg; 4814b396bbSKris Buschelman 4914b396bbSKris Buschelman /* Flag to clean up (non-global) SuperLU objects during Destroy */ 5014b396bbSKris Buschelman PetscTruth CleanUpSuperLU; 51f0c56d0fSKris Buschelman } Mat_SuperLU; 5214b396bbSKris Buschelman 53e0e586b9SHong Zhang extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer); 540481f469SBarry Smith extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo *); 55e0e586b9SHong Zhang extern PetscErrorCode MatDestroy_SuperLU(Mat); 56e0e586b9SHong Zhang extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer); 57e0e586b9SHong Zhang extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType); 58e0e586b9SHong Zhang extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec); 59e0e586b9SHong Zhang extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec); 600481f469SBarry Smith extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*); 61e0e586b9SHong Zhang extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat *); 62e0e586b9SHong Zhang 635a46d3faSBarry Smith /* 645a46d3faSBarry Smith Utility function 655a46d3faSBarry Smith */ 665a46d3faSBarry Smith #undef __FUNCT__ 675a46d3faSBarry Smith #define __FUNCT__ "MatFactorInfo_SuperLU" 685a46d3faSBarry Smith PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer) 695a46d3faSBarry Smith { 705a46d3faSBarry Smith Mat_SuperLU *lu= (Mat_SuperLU*)A->spptr; 715a46d3faSBarry Smith PetscErrorCode ierr; 725a46d3faSBarry Smith superlu_options_t options; 735a46d3faSBarry Smith 745a46d3faSBarry Smith PetscFunctionBegin; 755a46d3faSBarry Smith /* check if matrix is superlu_dist type */ 765a46d3faSBarry Smith if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0); 775a46d3faSBarry Smith 785a46d3faSBarry Smith options = lu->options; 795a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr); 805a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," Equil: %s\n",(options.Equil != NO) ? "YES": "NO");CHKERRQ(ierr); 815a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr); 825a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr); 835a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES": "NO");CHKERRQ(ierr); 845a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr); 855a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES": "NO");CHKERRQ(ierr); 865a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES": "NO");CHKERRQ(ierr); 875a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr); 885a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES": "NO");CHKERRQ(ierr); 895a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," PrintStat: %s\n",(options.PrintStat != NO) ? "YES": "NO");CHKERRQ(ierr); 905a46d3faSBarry Smith ierr = PetscViewerASCIIPrintf(viewer," lwork: %D\n",lu->lwork);CHKERRQ(ierr); 91*cffbb591SHong Zhang if (A->factor == MAT_FACTOR_ILU){ 92*cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr); 93*cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr); 94*cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr); 95*cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr); 96*cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr); 97*cffbb591SHong Zhang ierr = PetscViewerASCIIPrintf(viewer," ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr); 98*cffbb591SHong Zhang } 995a46d3faSBarry Smith PetscFunctionReturn(0); 1005a46d3faSBarry Smith } 1015a46d3faSBarry Smith 1025a46d3faSBarry Smith /* 1035a46d3faSBarry Smith These are the methods provided to REPLACE the corresponding methods of the 1045a46d3faSBarry Smith SeqAIJ matrix class. Other methods of SeqAIJ are not replaced 1055a46d3faSBarry Smith */ 1065a46d3faSBarry Smith #undef __FUNCT__ 1075a46d3faSBarry Smith #define __FUNCT__ "MatLUFactorNumeric_SuperLU" 1080481f469SBarry Smith PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info) 1095a46d3faSBarry Smith { 1105a46d3faSBarry Smith Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(A)->data; 111719d5645SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)(F)->spptr; 1125a46d3faSBarry Smith PetscErrorCode ierr; 1135a46d3faSBarry Smith PetscInt sinfo; 1145a46d3faSBarry Smith SuperLUStat_t stat; 1155a46d3faSBarry Smith PetscReal ferr, berr; 1165a46d3faSBarry Smith NCformat *Ustore; 1175a46d3faSBarry Smith SCformat *Lstore; 1185a46d3faSBarry Smith 1195a46d3faSBarry Smith PetscFunctionBegin; 1205a46d3faSBarry Smith if (lu->flg == SAME_NONZERO_PATTERN){ /* successing numerical factorization */ 1215a46d3faSBarry Smith lu->options.Fact = SamePattern; 1225a46d3faSBarry Smith /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */ 1235a46d3faSBarry Smith Destroy_SuperMatrix_Store(&lu->A); 1245a46d3faSBarry Smith if ( lu->lwork >= 0 ) { 1255a46d3faSBarry Smith Destroy_SuperNode_Matrix(&lu->L); 1265a46d3faSBarry Smith Destroy_CompCol_Matrix(&lu->U); 1275a46d3faSBarry Smith lu->options.Fact = SamePattern; 1285a46d3faSBarry Smith } 1295a46d3faSBarry Smith } 1305a46d3faSBarry Smith 1315a46d3faSBarry Smith /* Create the SuperMatrix for lu->A=A^T: 1325a46d3faSBarry Smith Since SuperLU likes column-oriented matrices,we pass it the transpose, 1335a46d3faSBarry Smith and then solve A^T X = B in MatSolve(). */ 1345a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX) 135d0f46423SBarry Smith zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i, 1365a46d3faSBarry Smith SLU_NC,SLU_Z,SLU_GE); 1375a46d3faSBarry Smith #else 138d0f46423SBarry Smith dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i, 1395a46d3faSBarry Smith SLU_NC,SLU_D,SLU_GE); 1405a46d3faSBarry Smith #endif 1415a46d3faSBarry Smith 1425a46d3faSBarry Smith /* Initialize the statistics variables. */ 1435a46d3faSBarry Smith StatInit(&stat); 1445a46d3faSBarry Smith 1455a46d3faSBarry Smith /* Numerical factorization */ 1465a46d3faSBarry Smith lu->B.ncol = 0; /* Indicate not to solve the system */ 147*cffbb591SHong Zhang if (F->factor == MAT_FACTOR_LU){ 1485a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX) 1495a46d3faSBarry Smith zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 1505a46d3faSBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 1515a46d3faSBarry Smith &lu->mem_usage, &stat, &sinfo); 1525a46d3faSBarry Smith #else 1535a46d3faSBarry Smith dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 1545a46d3faSBarry Smith &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 1555a46d3faSBarry Smith &lu->mem_usage, &stat, &sinfo); 1565a46d3faSBarry Smith #endif 157*cffbb591SHong Zhang } else if (F->factor == MAT_FACTOR_ILU){ 158*cffbb591SHong Zhang /* Compute the incomplete factorization, condition number and pivot growth */ 159*cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX) 160*cffbb591SHong Zhang zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C, 161*cffbb591SHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 162*cffbb591SHong Zhang &lu->mem_usage, &stat, &sinfo); 163*cffbb591SHong Zhang #else 164*cffbb591SHong Zhang dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 165*cffbb591SHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 166*cffbb591SHong Zhang &lu->mem_usage, &stat, &sinfo); 167*cffbb591SHong Zhang #endif 168*cffbb591SHong Zhang } else { 169*cffbb591SHong Zhang SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 170*cffbb591SHong Zhang } 1715a46d3faSBarry Smith if ( !sinfo || sinfo == lu->A.ncol+1 ) { 1725a46d3faSBarry Smith if ( lu->options.PivotGrowth ) 1735a46d3faSBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," Recip. pivot growth = %e\n", lu->rpg); 1745a46d3faSBarry Smith if ( lu->options.ConditionNumber ) 1755a46d3faSBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," Recip. condition number = %e\n", lu->rcond); 1765a46d3faSBarry Smith } else if ( sinfo > 0 ){ 1775a46d3faSBarry Smith if ( lu->lwork == -1 ) { 1785a46d3faSBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol); 1795a46d3faSBarry Smith } else { 1805a46d3faSBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," Warning: gssvx() returns info %D\n",sinfo); 1815a46d3faSBarry Smith } 1825a46d3faSBarry Smith } else { /* sinfo < 0 */ 1835a46d3faSBarry Smith SETERRQ2(PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo); 1845a46d3faSBarry Smith } 1855a46d3faSBarry Smith 1865a46d3faSBarry Smith if ( lu->options.PrintStat ) { 1875a46d3faSBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n"); 1885a46d3faSBarry Smith StatPrint(&stat); 1895a46d3faSBarry Smith Lstore = (SCformat *) lu->L.Store; 1905a46d3faSBarry Smith Ustore = (NCformat *) lu->U.Store; 1915a46d3faSBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in factor L = %D\n", Lstore->nnz); 1925a46d3faSBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in factor U = %D\n", Ustore->nnz); 1935a46d3faSBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol); 1946da386baSHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," L\\U MB %.3f\ttotal MB needed %.3f\n", 1956da386baSHong Zhang lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6); 1965a46d3faSBarry Smith } 1975a46d3faSBarry Smith StatFree(&stat); 1985a46d3faSBarry Smith 1995a46d3faSBarry Smith lu->flg = SAME_NONZERO_PATTERN; 200719d5645SBarry Smith (F)->ops->solve = MatSolve_SuperLU; 201719d5645SBarry Smith (F)->ops->solvetranspose = MatSolveTranspose_SuperLU; 2025a46d3faSBarry Smith PetscFunctionReturn(0); 2035a46d3faSBarry Smith } 2045a46d3faSBarry Smith 20514b396bbSKris Buschelman #undef __FUNCT__ 206f0c56d0fSKris Buschelman #define __FUNCT__ "MatDestroy_SuperLU" 207dfbe8321SBarry Smith PetscErrorCode MatDestroy_SuperLU(Mat A) 20814b396bbSKris Buschelman { 209dfbe8321SBarry Smith PetscErrorCode ierr; 210f0c56d0fSKris Buschelman Mat_SuperLU *lu=(Mat_SuperLU*)A->spptr; 21114b396bbSKris Buschelman 21214b396bbSKris Buschelman PetscFunctionBegin; 21375af56d4SHong Zhang if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */ 21475af56d4SHong Zhang Destroy_SuperMatrix_Store(&lu->A); 21575af56d4SHong Zhang Destroy_SuperMatrix_Store(&lu->B); 21675af56d4SHong Zhang Destroy_SuperMatrix_Store(&lu->X); 2179ce50919SHong Zhang 2189ce50919SHong Zhang ierr = PetscFree(lu->etree);CHKERRQ(ierr); 2199ce50919SHong Zhang ierr = PetscFree(lu->perm_r);CHKERRQ(ierr); 2209ce50919SHong Zhang ierr = PetscFree(lu->perm_c);CHKERRQ(ierr); 2219ce50919SHong Zhang ierr = PetscFree(lu->R);CHKERRQ(ierr); 2229ce50919SHong Zhang ierr = PetscFree(lu->C);CHKERRQ(ierr); 22375af56d4SHong Zhang if ( lu->lwork >= 0 ) { 224fb3e25aaSKris Buschelman Destroy_SuperNode_Matrix(&lu->L); 225fb3e25aaSKris Buschelman Destroy_CompCol_Matrix(&lu->U); 22675af56d4SHong Zhang } 227fb3e25aaSKris Buschelman } 228b24902e0SBarry Smith ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr); 22914b396bbSKris Buschelman PetscFunctionReturn(0); 23014b396bbSKris Buschelman } 23114b396bbSKris Buschelman 23214b396bbSKris Buschelman #undef __FUNCT__ 233f0c56d0fSKris Buschelman #define __FUNCT__ "MatView_SuperLU" 234dfbe8321SBarry Smith PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer) 23514b396bbSKris Buschelman { 236dfbe8321SBarry Smith PetscErrorCode ierr; 23732077d6dSBarry Smith PetscTruth iascii; 23814b396bbSKris Buschelman PetscViewerFormat format; 23914b396bbSKris Buschelman 24014b396bbSKris Buschelman PetscFunctionBegin; 241b24902e0SBarry Smith ierr = MatView_SeqAIJ(A,viewer);CHKERRQ(ierr); 24214b396bbSKris Buschelman 24332077d6dSBarry Smith ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr); 24432077d6dSBarry Smith if (iascii) { 24514b396bbSKris Buschelman ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 2462f59403fSHong Zhang if (format == PETSC_VIEWER_ASCII_INFO) { 247f0c56d0fSKris Buschelman ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr); 24814b396bbSKris Buschelman } 24914b396bbSKris Buschelman } 25014b396bbSKris Buschelman PetscFunctionReturn(0); 25114b396bbSKris Buschelman } 25214b396bbSKris Buschelman 25314b396bbSKris Buschelman 25414b396bbSKris Buschelman #undef __FUNCT__ 255c29e884eSHong Zhang #define __FUNCT__ "MatSolve_SuperLU_Private" 256c29e884eSHong Zhang PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x) 25714b396bbSKris Buschelman { 258f0c56d0fSKris Buschelman Mat_SuperLU *lu = (Mat_SuperLU*)A->spptr; 25975af56d4SHong Zhang PetscScalar *barray,*xarray; 260dfbe8321SBarry Smith PetscErrorCode ierr; 261da7a1d00SHong Zhang PetscInt info,i; 26275af56d4SHong Zhang SuperLUStat_t stat; 263da7a1d00SHong Zhang PetscReal ferr,berr; 26414b396bbSKris Buschelman 26514b396bbSKris Buschelman PetscFunctionBegin; 266937a6b0eSHong Zhang if ( lu->lwork == -1 ) { 267937a6b0eSHong Zhang PetscFunctionReturn(0); 268937a6b0eSHong Zhang } 26975af56d4SHong Zhang lu->B.ncol = 1; /* Set the number of right-hand side */ 27075af56d4SHong Zhang ierr = VecGetArray(b,&barray);CHKERRQ(ierr); 27175af56d4SHong Zhang ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 2725fe6150dSHong Zhang 2735fe6150dSHong Zhang #if defined(PETSC_USE_COMPLEX) 2745fe6150dSHong Zhang ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray; 2755fe6150dSHong Zhang ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray; 2765fe6150dSHong Zhang #else 2775fe6150dSHong Zhang ((DNformat*)lu->B.Store)->nzval = barray; 27875af56d4SHong Zhang ((DNformat*)lu->X.Store)->nzval = xarray; 2795fe6150dSHong Zhang #endif 28075af56d4SHong Zhang 28175af56d4SHong Zhang /* Initialize the statistics variables. */ 28275af56d4SHong Zhang StatInit(&stat); 28375af56d4SHong Zhang 28475af56d4SHong Zhang lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ 285*cffbb591SHong Zhang if (A->factor == MAT_FACTOR_LU){ 2868914a3f7SHong Zhang #if defined(PETSC_USE_COMPLEX) 2878914a3f7SHong Zhang zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 2888914a3f7SHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 2898914a3f7SHong Zhang &lu->mem_usage, &stat, &info); 2908914a3f7SHong Zhang #else 29175af56d4SHong Zhang dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 29275af56d4SHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 29375af56d4SHong Zhang &lu->mem_usage, &stat, &info); 2948914a3f7SHong Zhang #endif 295*cffbb591SHong Zhang } else if (A->factor == MAT_FACTOR_ILU){ 296*cffbb591SHong Zhang #if defined(PETSC_USE_COMPLEX) 297*cffbb591SHong Zhang zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 298*cffbb591SHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 299*cffbb591SHong Zhang &lu->mem_usage, &stat, &info); 300*cffbb591SHong Zhang #else 301*cffbb591SHong Zhang dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 302*cffbb591SHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, 303*cffbb591SHong Zhang &lu->mem_usage, &stat, &info); 304*cffbb591SHong Zhang #endif 305*cffbb591SHong Zhang } else { 306*cffbb591SHong Zhang SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 307*cffbb591SHong Zhang } 30875af56d4SHong Zhang ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr); 30975af56d4SHong Zhang ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 31075af56d4SHong Zhang 311958c9bccSBarry Smith if ( !info || info == lu->A.ncol+1 ) { 31275af56d4SHong Zhang if ( lu->options.IterRefine ) { 3138914a3f7SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n"); 3148914a3f7SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); 31575af56d4SHong Zhang for (i = 0; i < 1; ++i) 3168914a3f7SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF," %8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr, berr); 31775af56d4SHong Zhang } 3188914a3f7SHong Zhang } else if ( info > 0 ){ 3198914a3f7SHong Zhang if ( lu->lwork == -1 ) { 32077431f27SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," ** Estimated memory: %D bytes\n", info - lu->A.ncol); 3218914a3f7SHong Zhang } else { 32277431f27SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," Warning: gssvx() returns info %D\n",info); 3238914a3f7SHong Zhang } 3248914a3f7SHong Zhang } else if (info < 0){ 32577431f27SBarry Smith SETERRQ2(PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info); 32614b396bbSKris Buschelman } 32714b396bbSKris Buschelman 3288914a3f7SHong Zhang if ( lu->options.PrintStat ) { 3298914a3f7SHong Zhang ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n"); 3308914a3f7SHong Zhang StatPrint(&stat); 3318914a3f7SHong Zhang } 33275af56d4SHong Zhang StatFree(&stat); 33375af56d4SHong Zhang PetscFunctionReturn(0); 33475af56d4SHong Zhang } 33514b396bbSKris Buschelman 33614b396bbSKris Buschelman #undef __FUNCT__ 337c29e884eSHong Zhang #define __FUNCT__ "MatSolve_SuperLU" 338c29e884eSHong Zhang PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x) 339c29e884eSHong Zhang { 340c29e884eSHong Zhang Mat_SuperLU *lu = (Mat_SuperLU*)A->spptr; 341c29e884eSHong Zhang PetscErrorCode ierr; 342c29e884eSHong Zhang 343c29e884eSHong Zhang PetscFunctionBegin; 344c29e884eSHong Zhang lu->options.Trans = TRANS; 345c29e884eSHong Zhang ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr); 346c29e884eSHong Zhang PetscFunctionReturn(0); 347c29e884eSHong Zhang } 348c29e884eSHong Zhang 349c29e884eSHong Zhang #undef __FUNCT__ 350c7c1fe80SHong Zhang #define __FUNCT__ "MatSolveTranspose_SuperLU" 351c7c1fe80SHong Zhang PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x) 352c7c1fe80SHong Zhang { 353c7c1fe80SHong Zhang Mat_SuperLU *lu = (Mat_SuperLU*)A->spptr; 354c7c1fe80SHong Zhang PetscErrorCode ierr; 355c7c1fe80SHong Zhang 356c7c1fe80SHong Zhang PetscFunctionBegin; 357c7c1fe80SHong Zhang lu->options.Trans = NOTRANS; 358c29e884eSHong Zhang ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr); 359c7c1fe80SHong Zhang PetscFunctionReturn(0); 360c7c1fe80SHong Zhang } 361c7c1fe80SHong Zhang 36214b396bbSKris Buschelman /* 36314b396bbSKris Buschelman Note the r permutation is ignored 36414b396bbSKris Buschelman */ 36514b396bbSKris Buschelman #undef __FUNCT__ 366f0c56d0fSKris Buschelman #define __FUNCT__ "MatLUFactorSymbolic_SuperLU" 3670481f469SBarry Smith PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 36814b396bbSKris Buschelman { 369719d5645SBarry Smith Mat_SuperLU *lu = (Mat_SuperLU*)((F)->spptr); 370b24902e0SBarry Smith PetscErrorCode ierr; 371d0f46423SBarry Smith PetscInt m=A->rmap->n,n=A->cmap->n; 372b24902e0SBarry Smith 373b24902e0SBarry Smith PetscFunctionBegin; 374b24902e0SBarry Smith 375b24902e0SBarry Smith /* Allocate spaces (notice sizes are for the transpose) */ 376b24902e0SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&lu->etree);CHKERRQ(ierr); 377b24902e0SBarry Smith ierr = PetscMalloc(n*sizeof(PetscInt),&lu->perm_r);CHKERRQ(ierr); 378b24902e0SBarry Smith ierr = PetscMalloc(m*sizeof(PetscInt),&lu->perm_c);CHKERRQ(ierr); 379*cffbb591SHong Zhang ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->R);CHKERRQ(ierr); 380*cffbb591SHong Zhang ierr = PetscMalloc(m*sizeof(PetscScalar),&lu->C);CHKERRQ(ierr); 381b24902e0SBarry Smith 382b24902e0SBarry Smith /* create rhs and solution x without allocate space for .Store */ 383b24902e0SBarry Smith #if defined(PETSC_USE_COMPLEX) 384b24902e0SBarry Smith zCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE); 385b24902e0SBarry Smith zCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE); 386b24902e0SBarry Smith #else 387b24902e0SBarry Smith dCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE); 388b24902e0SBarry Smith dCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE); 389b24902e0SBarry Smith #endif 390b24902e0SBarry Smith 391b24902e0SBarry Smith lu->flg = DIFFERENT_NONZERO_PATTERN; 392b24902e0SBarry Smith lu->CleanUpSuperLU = PETSC_TRUE; 393719d5645SBarry Smith (F)->ops->lufactornumeric = MatLUFactorNumeric_SuperLU; 394b24902e0SBarry Smith PetscFunctionReturn(0); 395b24902e0SBarry Smith } 396b24902e0SBarry Smith 39735bd34faSBarry Smith EXTERN_C_BEGIN 39835bd34faSBarry Smith #undef __FUNCT__ 39935bd34faSBarry Smith #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu" 40035bd34faSBarry Smith PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type) 40135bd34faSBarry Smith { 40235bd34faSBarry Smith PetscFunctionBegin; 40335bd34faSBarry Smith *type = MAT_SOLVER_SUPERLU; 40435bd34faSBarry Smith PetscFunctionReturn(0); 40535bd34faSBarry Smith } 40635bd34faSBarry Smith EXTERN_C_END 40735bd34faSBarry Smith 408b24902e0SBarry Smith 409b24902e0SBarry Smith /*MC 410b5e56a35SBarry Smith MAT_SOLVER_SUPERLU = "superlu" - A solver package roviding direct solvers (LU) for sequential matrices 411b24902e0SBarry Smith via the external package SuperLU. 412b24902e0SBarry Smith 4135c9eb25fSBarry Smith Use config/configure.py --download-superlu to have PETSc installed with SuperLU 414b24902e0SBarry Smith 415b24902e0SBarry Smith Options Database Keys: 4165c9eb25fSBarry Smith + -mat_superlu_ordering <0,1,2,3> - 0: natural ordering, 417b24902e0SBarry Smith 1: MMD applied to A'*A, 418b24902e0SBarry Smith 2: MMD applied to A'+A, 419b24902e0SBarry Smith 3: COLAMD, approximate minimum degree column ordering 420b24902e0SBarry Smith . -mat_superlu_iterrefine - have SuperLU do iterative refinement after the triangular solve 421b24902e0SBarry Smith choices: NOREFINE, SINGLE, DOUBLE, EXTRA; default is NOREFINE 422b24902e0SBarry Smith - -mat_superlu_printstat - print SuperLU statistics about the factorization 423b24902e0SBarry Smith 4245c9eb25fSBarry Smith Notes: Do not confuse this with MAT_SOLVER_SUPERLU_DIST which is for parallel sparse solves 4255c9eb25fSBarry Smith 426b24902e0SBarry Smith Level: beginner 427b24902e0SBarry Smith 42841c8de11SBarry Smith .seealso: PCLU, MAT_SOLVER_SUPERLU_DIST, MAT_SOLVER_MUMPS, MAT_SOLVER_SPOOLES, PCFactorSetMatSolverPackage(), MatSolverPackage 429b24902e0SBarry Smith M*/ 430b24902e0SBarry Smith 4315c9eb25fSBarry Smith EXTERN_C_BEGIN 432b24902e0SBarry Smith #undef __FUNCT__ 433b24902e0SBarry Smith #define __FUNCT__ "MatGetFactor_seqaij_superlu" 4345c9eb25fSBarry Smith PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F) 435b24902e0SBarry Smith { 43614b396bbSKris Buschelman Mat B; 437f0c56d0fSKris Buschelman Mat_SuperLU *lu; 4386849ba73SBarry Smith PetscErrorCode ierr; 439e631078cSBarry Smith PetscInt indx; 4409ce50919SHong Zhang PetscTruth flg; 4415ca28756SHong Zhang const char *colperm[]={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */ 4425ca28756SHong Zhang const char *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"}; 4435ca28756SHong Zhang const char *rowperm[]={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */ 44414b396bbSKris Buschelman 44514b396bbSKris Buschelman PetscFunctionBegin; 4467adad957SLisandro Dalcin ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr); 447d0f46423SBarry Smith ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr); 4487adad957SLisandro Dalcin ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr); 449899d7b4fSKris Buschelman ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 450f0c56d0fSKris Buschelman 451*cffbb591SHong Zhang if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU){ 452b24902e0SBarry Smith B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU; 453*cffbb591SHong Zhang B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU; 454*cffbb591SHong Zhang } else { 455*cffbb591SHong Zhang SETERRQ(PETSC_ERR_SUP,"Factor type not supported"); 456*cffbb591SHong Zhang } 457*cffbb591SHong Zhang 458b24902e0SBarry Smith B->ops->destroy = MatDestroy_SuperLU; 4593519fcdcSHong Zhang B->ops->view = MatView_SuperLU; 460*cffbb591SHong Zhang B->factor = ftype; 46194b7f48cSBarry Smith B->assembled = PETSC_TRUE; /* required by -ksp_view */ 4625c9eb25fSBarry Smith B->preallocated = PETSC_TRUE; 46314b396bbSKris Buschelman 464b24902e0SBarry Smith ierr = PetscNewLog(B,Mat_SuperLU,&lu);CHKERRQ(ierr); 465*cffbb591SHong Zhang if (ftype == MAT_FACTOR_LU){ 4669ce50919SHong Zhang set_default_options(&lu->options); 467*cffbb591SHong Zhang } else if (ftype == MAT_FACTOR_ILU){ 468*cffbb591SHong Zhang /* Set the default input options of ilu: 469*cffbb591SHong Zhang options.Fact = DOFACT; 470*cffbb591SHong Zhang options.Equil = YES; 471*cffbb591SHong Zhang options.ColPerm = COLAMD; 472*cffbb591SHong Zhang options.DiagPivotThresh = 0.1; //different from complete LU 473*cffbb591SHong Zhang options.Trans = NOTRANS; 474*cffbb591SHong Zhang options.IterRefine = NOREFINE; 475*cffbb591SHong Zhang options.SymmetricMode = NO; 476*cffbb591SHong Zhang options.PivotGrowth = NO; 477*cffbb591SHong Zhang options.ConditionNumber = NO; 478*cffbb591SHong Zhang options.PrintStat = YES; 479*cffbb591SHong Zhang options.RowPerm = LargeDiag; 480*cffbb591SHong Zhang options.ILU_DropTol = 1e-4; 481*cffbb591SHong Zhang options.ILU_FillTol = 1e-2; 482*cffbb591SHong Zhang options.ILU_FillFactor = 10.0; 483*cffbb591SHong Zhang options.ILU_DropRule = DROP_BASIC | DROP_AREA; 484*cffbb591SHong Zhang options.ILU_Norm = INF_NORM; 485*cffbb591SHong Zhang options.ILU_MILU = SMILU_2; 486*cffbb591SHong Zhang */ 487*cffbb591SHong Zhang ilu_set_default_options(&lu->options); 488*cffbb591SHong Zhang } 489*cffbb591SHong Zhang 4909ce50919SHong Zhang lu->options.PrintStat = NO; 4918914a3f7SHong Zhang lu->lwork = 0; /* allocate space internally by system malloc */ 4929ce50919SHong Zhang 4937adad957SLisandro Dalcin ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr); 494*cffbb591SHong Zhang ierr = PetscOptionsTruth("-mat_superlu_equil","Equil","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 495*cffbb591SHong Zhang if (flg) lu->options.Equil = YES; 4968914a3f7SHong Zhang ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr); 4979ce50919SHong Zhang if (flg) {lu->options.ColPerm = (colperm_t)indx;} 4988914a3f7SHong Zhang ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr); 4999ce50919SHong Zhang if (flg) { lu->options.IterRefine = (IterRefine_t)indx;} 5004dc4c822SBarry Smith ierr = PetscOptionsTruth("-mat_superlu_symmetricmode","SymmetricMode","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 5019ce50919SHong Zhang if (flg) lu->options.SymmetricMode = YES; 5028914a3f7SHong Zhang ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&lu->options.DiagPivotThresh,PETSC_NULL);CHKERRQ(ierr); 5034dc4c822SBarry Smith ierr = PetscOptionsTruth("-mat_superlu_pivotgrowth","PivotGrowth","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 5049ce50919SHong Zhang if (flg) lu->options.PivotGrowth = YES; 5054dc4c822SBarry Smith ierr = PetscOptionsTruth("-mat_superlu_conditionnumber","ConditionNumber","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 5069ce50919SHong Zhang if (flg) lu->options.ConditionNumber = YES; 5078914a3f7SHong Zhang ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[0],&indx,&flg);CHKERRQ(ierr); 5089ce50919SHong Zhang if (flg) {lu->options.RowPerm = (rowperm_t)indx;} 5094dc4c822SBarry Smith ierr = PetscOptionsTruth("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 5109ce50919SHong Zhang if (flg) lu->options.ReplaceTinyPivot = YES; 5114dc4c822SBarry Smith ierr = PetscOptionsTruth("-mat_superlu_printstat","PrintStat","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr); 5129ce50919SHong Zhang if (flg) lu->options.PrintStat = YES; 5138914a3f7SHong Zhang ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,PETSC_NULL);CHKERRQ(ierr); 5145fe6150dSHong Zhang if (lu->lwork > 0 ){ 5155fe6150dSHong Zhang ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr); 5165fe6150dSHong Zhang } else if (lu->lwork != 0 && lu->lwork != -1){ 51777431f27SBarry Smith ierr = PetscPrintf(PETSC_COMM_SELF," Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork); 5188914a3f7SHong Zhang lu->lwork = 0; 5198914a3f7SHong Zhang } 520*cffbb591SHong Zhang /* ilu options */ 521*cffbb591SHong Zhang ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&lu->options.ILU_DropTol,PETSC_NULL);CHKERRQ(ierr); 522*cffbb591SHong Zhang ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&lu->options.ILU_FillTol,PETSC_NULL);CHKERRQ(ierr); 523*cffbb591SHong Zhang ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&lu->options.ILU_FillFactor,PETSC_NULL);CHKERRQ(ierr); 524*cffbb591SHong Zhang ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,PETSC_NULL);CHKERRQ(ierr); 525*cffbb591SHong Zhang ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr); 526*cffbb591SHong Zhang if (flg){ 527*cffbb591SHong Zhang lu->options.ILU_Norm = (norm_t)indx; 528*cffbb591SHong Zhang } 529*cffbb591SHong Zhang ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr); 530*cffbb591SHong Zhang if (flg){ 531*cffbb591SHong Zhang lu->options.ILU_MILU = (milu_t)indx; 532*cffbb591SHong Zhang } 5339ce50919SHong Zhang PetscOptionsEnd(); 5349ce50919SHong Zhang 53575af56d4SHong Zhang #ifdef SUPERLU2 5365c9eb25fSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatCreateNull","MatCreateNull_SuperLU",(void(*)(void))MatCreateNull_SuperLU);CHKERRQ(ierr); 53775af56d4SHong Zhang #endif 53835bd34faSBarry Smith ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_seqaij_superlu",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr); 5395c9eb25fSBarry Smith B->spptr = lu; 540899d7b4fSKris Buschelman *F = B; 54114b396bbSKris Buschelman PetscFunctionReturn(0); 54214b396bbSKris Buschelman } 5435c9eb25fSBarry Smith EXTERN_C_END 544