xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision 3519fcdc3ccd010619df22db39613e3a1abe34d6)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
214b396bbSKris Buschelman 
35a46d3faSBarry Smith /*  --------------------------------------------------------------------
45a46d3faSBarry Smith 
55a46d3faSBarry Smith      This file implements a subclass of the SeqAIJ matrix class that uses
65a46d3faSBarry Smith      the SuperLU 3.0 sparse solver. You can use this as a starting point for
75a46d3faSBarry Smith      implementing your own subclass of a PETSc matrix class.
85a46d3faSBarry Smith 
95a46d3faSBarry Smith      This demonstrates a way to make an implementation inheritence of a PETSc
105a46d3faSBarry Smith      matrix type. This means constructing a new matrix type (SuperLU) by changing some
115a46d3faSBarry Smith      of the methods of the previous type (SeqAIJ), adding additional data, and possibly
125a46d3faSBarry Smith      additional method. (See any book on object oriented programming).
1314b396bbSKris Buschelman */
1414b396bbSKris Buschelman 
155a46d3faSBarry Smith /*
165a46d3faSBarry Smith      Defines the data structure for the base matrix type (SeqAIJ)
175a46d3faSBarry Smith */
1814b396bbSKris Buschelman #include "src/mat/impls/aij/seq/aij.h"
1914b396bbSKris Buschelman 
205a46d3faSBarry Smith /*
215a46d3faSBarry Smith      SuperLU include files
225a46d3faSBarry Smith */
2314b396bbSKris Buschelman EXTERN_C_BEGIN
2414b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
2566f57492SHong Zhang #include "slu_zdefs.h"
2614b396bbSKris Buschelman #else
2766f57492SHong Zhang #include "slu_ddefs.h"
2814b396bbSKris Buschelman #endif
2966f57492SHong Zhang #include "slu_util.h"
3014b396bbSKris Buschelman EXTERN_C_END
3114b396bbSKris Buschelman 
325a46d3faSBarry Smith /*
335a46d3faSBarry Smith      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
345a46d3faSBarry Smith */
3514b396bbSKris Buschelman typedef struct {
3675af56d4SHong Zhang   SuperMatrix       A,L,U,B,X;
3775af56d4SHong Zhang   superlu_options_t options;
38da7a1d00SHong Zhang   PetscInt          *perm_c; /* column permutation vector */
39da7a1d00SHong Zhang   PetscInt          *perm_r; /* row permutations from partial pivoting */
40da7a1d00SHong Zhang   PetscInt          *etree;
41da7a1d00SHong Zhang   PetscReal         *R, *C;
4275af56d4SHong Zhang   char              equed[1];
43da7a1d00SHong Zhang   PetscInt          lwork;
4475af56d4SHong Zhang   void              *work;
45da7a1d00SHong Zhang   PetscReal         rpg, rcond;
4675af56d4SHong Zhang   mem_usage_t       mem_usage;
4775af56d4SHong Zhang   MatStructure      flg;
4814b396bbSKris Buschelman 
4914b396bbSKris Buschelman   /* Flag to clean up (non-global) SuperLU objects during Destroy */
5014b396bbSKris Buschelman   PetscTruth CleanUpSuperLU;
51f0c56d0fSKris Buschelman } Mat_SuperLU;
5214b396bbSKris Buschelman 
53e0e586b9SHong Zhang extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
54e0e586b9SHong Zhang extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,MatFactorInfo *,Mat *);
55e0e586b9SHong Zhang extern PetscErrorCode MatDestroy_SuperLU(Mat);
56e0e586b9SHong Zhang extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
57e0e586b9SHong Zhang extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
58e0e586b9SHong Zhang extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
59e0e586b9SHong Zhang extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
60e0e586b9SHong Zhang extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,IS,IS,MatFactorInfo *,Mat *);
61e0e586b9SHong Zhang 
625a46d3faSBarry Smith /*
635a46d3faSBarry Smith     Utility function
645a46d3faSBarry Smith */
655a46d3faSBarry Smith #undef __FUNCT__
665a46d3faSBarry Smith #define __FUNCT__ "MatFactorInfo_SuperLU"
675a46d3faSBarry Smith PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
685a46d3faSBarry Smith {
695a46d3faSBarry Smith   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
705a46d3faSBarry Smith   PetscErrorCode    ierr;
715a46d3faSBarry Smith   superlu_options_t options;
725a46d3faSBarry Smith 
735a46d3faSBarry Smith   PetscFunctionBegin;
745a46d3faSBarry Smith   /* check if matrix is superlu_dist type */
755a46d3faSBarry Smith   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
765a46d3faSBarry Smith 
775a46d3faSBarry Smith   options = lu->options;
785a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
795a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES": "NO");CHKERRQ(ierr);
805a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
815a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
825a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES": "NO");CHKERRQ(ierr);
835a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
845a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES": "NO");CHKERRQ(ierr);
855a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES": "NO");CHKERRQ(ierr);
865a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
875a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES": "NO");CHKERRQ(ierr);
885a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES": "NO");CHKERRQ(ierr);
895a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
905a46d3faSBarry Smith 
915a46d3faSBarry Smith   PetscFunctionReturn(0);
925a46d3faSBarry Smith }
935a46d3faSBarry Smith 
945a46d3faSBarry Smith /*
955a46d3faSBarry Smith     These are the methods provided to REPLACE the corresponding methods of the
965a46d3faSBarry Smith    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
975a46d3faSBarry Smith */
985a46d3faSBarry Smith #undef __FUNCT__
995a46d3faSBarry Smith #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
1005a46d3faSBarry Smith PetscErrorCode MatLUFactorNumeric_SuperLU(Mat A,MatFactorInfo *info,Mat *F)
1015a46d3faSBarry Smith {
1025a46d3faSBarry Smith   Mat_SeqAIJ     *aa = (Mat_SeqAIJ*)(A)->data;
1035a46d3faSBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)(*F)->spptr;
1045a46d3faSBarry Smith   PetscErrorCode ierr;
1055a46d3faSBarry Smith   PetscInt       sinfo;
1065a46d3faSBarry Smith   SuperLUStat_t  stat;
1075a46d3faSBarry Smith   PetscReal      ferr, berr;
1085a46d3faSBarry Smith   NCformat       *Ustore;
1095a46d3faSBarry Smith   SCformat       *Lstore;
1105a46d3faSBarry Smith 
1115a46d3faSBarry Smith   PetscFunctionBegin;
1125a46d3faSBarry Smith   if (lu->flg == SAME_NONZERO_PATTERN){ /* successing numerical factorization */
1135a46d3faSBarry Smith     lu->options.Fact = SamePattern;
1145a46d3faSBarry Smith     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
1155a46d3faSBarry Smith     Destroy_SuperMatrix_Store(&lu->A);
1165a46d3faSBarry Smith     if ( lu->lwork >= 0 ) {
1175a46d3faSBarry Smith       Destroy_SuperNode_Matrix(&lu->L);
1185a46d3faSBarry Smith       Destroy_CompCol_Matrix(&lu->U);
1195a46d3faSBarry Smith       lu->options.Fact = SamePattern;
1205a46d3faSBarry Smith     }
1215a46d3faSBarry Smith   }
1225a46d3faSBarry Smith 
1235a46d3faSBarry Smith   /* Create the SuperMatrix for lu->A=A^T:
1245a46d3faSBarry Smith        Since SuperLU likes column-oriented matrices,we pass it the transpose,
1255a46d3faSBarry Smith        and then solve A^T X = B in MatSolve(). */
1265a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
127d0f46423SBarry Smith   zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,
1285a46d3faSBarry Smith                            SLU_NC,SLU_Z,SLU_GE);
1295a46d3faSBarry Smith #else
130d0f46423SBarry Smith   dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,
1315a46d3faSBarry Smith                            SLU_NC,SLU_D,SLU_GE);
1325a46d3faSBarry Smith #endif
1335a46d3faSBarry Smith 
1345a46d3faSBarry Smith   /* Initialize the statistics variables. */
1355a46d3faSBarry Smith   StatInit(&stat);
1365a46d3faSBarry Smith 
1375a46d3faSBarry Smith   /* Numerical factorization */
1385a46d3faSBarry Smith   lu->B.ncol = 0;  /* Indicate not to solve the system */
1395a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
1405a46d3faSBarry Smith    zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
1415a46d3faSBarry Smith            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
1425a46d3faSBarry Smith            &lu->mem_usage, &stat, &sinfo);
1435a46d3faSBarry Smith #else
1445a46d3faSBarry Smith   dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
1455a46d3faSBarry Smith            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
1465a46d3faSBarry Smith            &lu->mem_usage, &stat, &sinfo);
1475a46d3faSBarry Smith #endif
1485a46d3faSBarry Smith   if ( !sinfo || sinfo == lu->A.ncol+1 ) {
1495a46d3faSBarry Smith     if ( lu->options.PivotGrowth )
1505a46d3faSBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);
1515a46d3faSBarry Smith     if ( lu->options.ConditionNumber )
1525a46d3faSBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);
1535a46d3faSBarry Smith   } else if ( sinfo > 0 ){
1545a46d3faSBarry Smith     if ( lu->lwork == -1 ) {
1555a46d3faSBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol);
1565a46d3faSBarry Smith     } else {
1575a46d3faSBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",sinfo);
1585a46d3faSBarry Smith     }
1595a46d3faSBarry Smith   } else { /* sinfo < 0 */
1605a46d3faSBarry Smith     SETERRQ2(PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
1615a46d3faSBarry Smith   }
1625a46d3faSBarry Smith 
1635a46d3faSBarry Smith   if ( lu->options.PrintStat ) {
1645a46d3faSBarry Smith     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");
1655a46d3faSBarry Smith     StatPrint(&stat);
1665a46d3faSBarry Smith     Lstore = (SCformat *) lu->L.Store;
1675a46d3faSBarry Smith     Ustore = (NCformat *) lu->U.Store;
1685a46d3faSBarry Smith     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
1695a46d3faSBarry Smith     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
1705a46d3faSBarry Smith     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
1715a46d3faSBarry Smith     ierr = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\texpansions %D\n",
1725a46d3faSBarry Smith 	       lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6,
1735a46d3faSBarry Smith 	       lu->mem_usage.expansions);
1745a46d3faSBarry Smith   }
1755a46d3faSBarry Smith   StatFree(&stat);
1765a46d3faSBarry Smith 
1775a46d3faSBarry Smith   lu->flg = SAME_NONZERO_PATTERN;
1785a46d3faSBarry Smith   PetscFunctionReturn(0);
1795a46d3faSBarry Smith }
1805a46d3faSBarry Smith 
18114b396bbSKris Buschelman #undef __FUNCT__
182f0c56d0fSKris Buschelman #define __FUNCT__ "MatDestroy_SuperLU"
183dfbe8321SBarry Smith PetscErrorCode MatDestroy_SuperLU(Mat A)
18414b396bbSKris Buschelman {
185dfbe8321SBarry Smith   PetscErrorCode ierr;
186f0c56d0fSKris Buschelman   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
18714b396bbSKris Buschelman 
18814b396bbSKris Buschelman   PetscFunctionBegin;
18975af56d4SHong Zhang   if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
19075af56d4SHong Zhang     Destroy_SuperMatrix_Store(&lu->A);
19175af56d4SHong Zhang     Destroy_SuperMatrix_Store(&lu->B);
19275af56d4SHong Zhang     Destroy_SuperMatrix_Store(&lu->X);
1939ce50919SHong Zhang 
1949ce50919SHong Zhang     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
1959ce50919SHong Zhang     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
1969ce50919SHong Zhang     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
1979ce50919SHong Zhang     ierr = PetscFree(lu->R);CHKERRQ(ierr);
1989ce50919SHong Zhang     ierr = PetscFree(lu->C);CHKERRQ(ierr);
19975af56d4SHong Zhang     if ( lu->lwork >= 0 ) {
200fb3e25aaSKris Buschelman       Destroy_SuperNode_Matrix(&lu->L);
201fb3e25aaSKris Buschelman       Destroy_CompCol_Matrix(&lu->U);
20275af56d4SHong Zhang     }
203fb3e25aaSKris Buschelman   }
204b24902e0SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
20514b396bbSKris Buschelman   PetscFunctionReturn(0);
20614b396bbSKris Buschelman }
20714b396bbSKris Buschelman 
20814b396bbSKris Buschelman #undef __FUNCT__
209f0c56d0fSKris Buschelman #define __FUNCT__ "MatView_SuperLU"
210dfbe8321SBarry Smith PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
21114b396bbSKris Buschelman {
212dfbe8321SBarry Smith   PetscErrorCode    ierr;
21332077d6dSBarry Smith   PetscTruth        iascii;
21414b396bbSKris Buschelman   PetscViewerFormat format;
21514b396bbSKris Buschelman 
21614b396bbSKris Buschelman   PetscFunctionBegin;
217b24902e0SBarry Smith   ierr = MatView_SeqAIJ(A,viewer);CHKERRQ(ierr);
21814b396bbSKris Buschelman 
21932077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
22032077d6dSBarry Smith   if (iascii) {
22114b396bbSKris Buschelman     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
2222f59403fSHong Zhang     if (format == PETSC_VIEWER_ASCII_INFO) {
223f0c56d0fSKris Buschelman       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
22414b396bbSKris Buschelman     }
22514b396bbSKris Buschelman   }
22614b396bbSKris Buschelman   PetscFunctionReturn(0);
22714b396bbSKris Buschelman }
22814b396bbSKris Buschelman 
22914b396bbSKris Buschelman 
23014b396bbSKris Buschelman #undef __FUNCT__
231c29e884eSHong Zhang #define __FUNCT__ "MatSolve_SuperLU_Private"
232c29e884eSHong Zhang PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
23314b396bbSKris Buschelman {
234f0c56d0fSKris Buschelman   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
23575af56d4SHong Zhang   PetscScalar    *barray,*xarray;
236dfbe8321SBarry Smith   PetscErrorCode ierr;
237da7a1d00SHong Zhang   PetscInt       info,i;
23875af56d4SHong Zhang   SuperLUStat_t  stat;
239da7a1d00SHong Zhang   PetscReal      ferr,berr;
24014b396bbSKris Buschelman 
24114b396bbSKris Buschelman   PetscFunctionBegin;
242937a6b0eSHong Zhang   if ( lu->lwork == -1 ) {
243937a6b0eSHong Zhang     PetscFunctionReturn(0);
244937a6b0eSHong Zhang   }
24575af56d4SHong Zhang   lu->B.ncol = 1;   /* Set the number of right-hand side */
24675af56d4SHong Zhang   ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
24775af56d4SHong Zhang   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
2485fe6150dSHong Zhang 
2495fe6150dSHong Zhang #if defined(PETSC_USE_COMPLEX)
2505fe6150dSHong Zhang   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
2515fe6150dSHong Zhang   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
2525fe6150dSHong Zhang #else
2535fe6150dSHong Zhang   ((DNformat*)lu->B.Store)->nzval = barray;
25475af56d4SHong Zhang   ((DNformat*)lu->X.Store)->nzval = xarray;
2555fe6150dSHong Zhang #endif
25675af56d4SHong Zhang 
25775af56d4SHong Zhang   /* Initialize the statistics variables. */
25875af56d4SHong Zhang   StatInit(&stat);
25975af56d4SHong Zhang 
26075af56d4SHong Zhang   lu->options.Fact  = FACTORED; /* Indicate the factored form of A is supplied. */
2618914a3f7SHong Zhang #if defined(PETSC_USE_COMPLEX)
2628914a3f7SHong Zhang   zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
2638914a3f7SHong Zhang            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
2648914a3f7SHong Zhang            &lu->mem_usage, &stat, &info);
2658914a3f7SHong Zhang #else
26675af56d4SHong Zhang   dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
26775af56d4SHong Zhang            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
26875af56d4SHong Zhang            &lu->mem_usage, &stat, &info);
2698914a3f7SHong Zhang #endif
27075af56d4SHong Zhang   ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
27175af56d4SHong Zhang   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
27275af56d4SHong Zhang 
273958c9bccSBarry Smith   if ( !info || info == lu->A.ncol+1 ) {
27475af56d4SHong Zhang     if ( lu->options.IterRefine ) {
2758914a3f7SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
2768914a3f7SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
27775af56d4SHong Zhang       for (i = 0; i < 1; ++i)
2788914a3f7SHong Zhang         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr, berr);
27975af56d4SHong Zhang     }
2808914a3f7SHong Zhang   } else if ( info > 0 ){
2818914a3f7SHong Zhang     if ( lu->lwork == -1 ) {
28277431f27SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
2838914a3f7SHong Zhang     } else {
28477431f27SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
2858914a3f7SHong Zhang     }
2868914a3f7SHong Zhang   } else if (info < 0){
28777431f27SBarry Smith     SETERRQ2(PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
28814b396bbSKris Buschelman   }
28914b396bbSKris Buschelman 
2908914a3f7SHong Zhang   if ( lu->options.PrintStat ) {
2918914a3f7SHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
2928914a3f7SHong Zhang     StatPrint(&stat);
2938914a3f7SHong Zhang   }
29475af56d4SHong Zhang   StatFree(&stat);
29575af56d4SHong Zhang   PetscFunctionReturn(0);
29675af56d4SHong Zhang }
29714b396bbSKris Buschelman 
29814b396bbSKris Buschelman #undef __FUNCT__
299c29e884eSHong Zhang #define __FUNCT__ "MatSolve_SuperLU"
300c29e884eSHong Zhang PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
301c29e884eSHong Zhang {
302c29e884eSHong Zhang   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
303c29e884eSHong Zhang   PetscErrorCode ierr;
304c29e884eSHong Zhang 
305c29e884eSHong Zhang   PetscFunctionBegin;
306c29e884eSHong Zhang   lu->options.Trans = TRANS;
307c29e884eSHong Zhang   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
308c29e884eSHong Zhang   PetscFunctionReturn(0);
309c29e884eSHong Zhang }
310c29e884eSHong Zhang 
311c29e884eSHong Zhang #undef __FUNCT__
312c7c1fe80SHong Zhang #define __FUNCT__ "MatSolveTranspose_SuperLU"
313c7c1fe80SHong Zhang PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
314c7c1fe80SHong Zhang {
315c7c1fe80SHong Zhang   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
316c7c1fe80SHong Zhang   PetscErrorCode ierr;
317c7c1fe80SHong Zhang 
318c7c1fe80SHong Zhang   PetscFunctionBegin;
319c7c1fe80SHong Zhang   lu->options.Trans = NOTRANS;
320c29e884eSHong Zhang   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
321c7c1fe80SHong Zhang   PetscFunctionReturn(0);
322c7c1fe80SHong Zhang }
323c7c1fe80SHong Zhang 
32414b396bbSKris Buschelman 
32514b396bbSKris Buschelman /*
32614b396bbSKris Buschelman    Note the r permutation is ignored
32714b396bbSKris Buschelman */
32814b396bbSKris Buschelman #undef __FUNCT__
329f0c56d0fSKris Buschelman #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
330dfbe8321SBarry Smith PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
33114b396bbSKris Buschelman {
332b24902e0SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)((*F)->spptr);
333b24902e0SBarry Smith   PetscErrorCode ierr;
334d0f46423SBarry Smith   PetscInt       m=A->rmap->n,n=A->cmap->n;
335b24902e0SBarry Smith 
336b24902e0SBarry Smith   PetscFunctionBegin;
337b24902e0SBarry Smith 
338b24902e0SBarry Smith   /* Allocate spaces (notice sizes are for the transpose) */
339b24902e0SBarry Smith   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->etree);CHKERRQ(ierr);
340b24902e0SBarry Smith   ierr = PetscMalloc(n*sizeof(PetscInt),&lu->perm_r);CHKERRQ(ierr);
341b24902e0SBarry Smith   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->perm_c);CHKERRQ(ierr);
342b24902e0SBarry Smith   ierr = PetscMalloc(n*sizeof(PetscInt),&lu->R);CHKERRQ(ierr);
343b24902e0SBarry Smith   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->C);CHKERRQ(ierr);
344b24902e0SBarry Smith 
345b24902e0SBarry Smith   /* create rhs and solution x without allocate space for .Store */
346b24902e0SBarry Smith #if defined(PETSC_USE_COMPLEX)
347b24902e0SBarry Smith   zCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE);
348b24902e0SBarry Smith   zCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE);
349b24902e0SBarry Smith #else
350b24902e0SBarry Smith   dCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
351b24902e0SBarry Smith   dCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
352b24902e0SBarry Smith #endif
353b24902e0SBarry Smith 
354b24902e0SBarry Smith   lu->flg            = DIFFERENT_NONZERO_PATTERN;
355b24902e0SBarry Smith   lu->CleanUpSuperLU = PETSC_TRUE;
356db4efbfdSBarry Smith   (*F)->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU;
357db4efbfdSBarry Smith   (*F)->ops->solve            = MatSolve_SuperLU;
358db4efbfdSBarry Smith   (*F)->ops->solvetranspose   = MatSolveTranspose_SuperLU;
359b24902e0SBarry Smith   PetscFunctionReturn(0);
360b24902e0SBarry Smith }
361b24902e0SBarry Smith 
362b24902e0SBarry Smith 
363b24902e0SBarry Smith /*MC
3645c9eb25fSBarry Smith   MAT_SOLVER_SUPERLU = "superlu" - A matrix type providing direct solvers (LU) for sequential matrices
365b24902e0SBarry Smith   via the external package SuperLU.
366b24902e0SBarry Smith 
3675c9eb25fSBarry Smith   Use config/configure.py --download-superlu to have PETSc installed with SuperLU
368b24902e0SBarry Smith 
369b24902e0SBarry Smith   Options Database Keys:
3705c9eb25fSBarry Smith + -mat_superlu_ordering <0,1,2,3> - 0: natural ordering,
371b24902e0SBarry Smith                                     1: MMD applied to A'*A,
372b24902e0SBarry Smith                                     2: MMD applied to A'+A,
373b24902e0SBarry Smith                                     3: COLAMD, approximate minimum degree column ordering
374b24902e0SBarry Smith . -mat_superlu_iterrefine - have SuperLU do iterative refinement after the triangular solve
375b24902e0SBarry Smith                           choices: NOREFINE, SINGLE, DOUBLE, EXTRA; default is NOREFINE
376b24902e0SBarry Smith - -mat_superlu_printstat - print SuperLU statistics about the factorization
377b24902e0SBarry Smith 
3785c9eb25fSBarry Smith    Notes: Do not confuse this with MAT_SOLVER_SUPERLU_DIST which is for parallel sparse solves
3795c9eb25fSBarry Smith 
380b24902e0SBarry Smith    Level: beginner
381b24902e0SBarry Smith 
3825c9eb25fSBarry Smith .seealso: PCLU, MAT_SOLVER_SUPERLU_DIST, MAT_SOLVER_MUMPS, MAT_SOLVER_SPOOLES
383b24902e0SBarry Smith M*/
384b24902e0SBarry Smith 
3855c9eb25fSBarry Smith EXTERN_C_BEGIN
386b24902e0SBarry Smith #undef __FUNCT__
387b24902e0SBarry Smith #define __FUNCT__ "MatGetFactor_seqaij_superlu"
3885c9eb25fSBarry Smith PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
389b24902e0SBarry Smith {
39014b396bbSKris Buschelman   Mat            B;
391f0c56d0fSKris Buschelman   Mat_SuperLU    *lu;
3926849ba73SBarry Smith   PetscErrorCode ierr;
393e631078cSBarry Smith   PetscInt       indx;
3949ce50919SHong Zhang   PetscTruth     flg;
3955ca28756SHong Zhang   const char     *colperm[]={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
3965ca28756SHong Zhang   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
3975ca28756SHong Zhang   const char     *rowperm[]={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
39814b396bbSKris Buschelman 
39914b396bbSKris Buschelman   PetscFunctionBegin;
4007adad957SLisandro Dalcin   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
401d0f46423SBarry Smith   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4027adad957SLisandro Dalcin   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
403899d7b4fSKris Buschelman   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
404f0c56d0fSKris Buschelman 
405b24902e0SBarry Smith   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU;
406b24902e0SBarry Smith   B->ops->destroy          = MatDestroy_SuperLU;
407*3519fcdcSHong Zhang   B->ops->view             = MatView_SuperLU;
4085c9eb25fSBarry Smith   B->factor                = MAT_FACTOR_LU;
40994b7f48cSBarry Smith   B->assembled             = PETSC_TRUE;  /* required by -ksp_view */
4105c9eb25fSBarry Smith   B->preallocated          = PETSC_TRUE;
41114b396bbSKris Buschelman 
412b24902e0SBarry Smith   ierr = PetscNewLog(B,Mat_SuperLU,&lu);CHKERRQ(ierr);
4139ce50919SHong Zhang   set_default_options(&lu->options);
4148914a3f7SHong Zhang   /* equilibration causes error in solve(), thus not supported here. See dgssvx.c for possible reason. */
4158914a3f7SHong Zhang   lu->options.Equil = NO;
4169ce50919SHong Zhang   lu->options.PrintStat = NO;
4178914a3f7SHong Zhang   lu->lwork = 0;   /* allocate space internally by system malloc */
4189ce50919SHong Zhang 
4197adad957SLisandro Dalcin   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
4208914a3f7SHong Zhang     ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
4219ce50919SHong Zhang     if (flg) {lu->options.ColPerm = (colperm_t)indx;}
4228914a3f7SHong Zhang     ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
4239ce50919SHong Zhang     if (flg) { lu->options.IterRefine = (IterRefine_t)indx;}
4244dc4c822SBarry Smith     ierr = PetscOptionsTruth("-mat_superlu_symmetricmode","SymmetricMode","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
4259ce50919SHong Zhang     if (flg) lu->options.SymmetricMode = YES;
4268914a3f7SHong Zhang     ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&lu->options.DiagPivotThresh,PETSC_NULL);CHKERRQ(ierr);
4274dc4c822SBarry Smith     ierr = PetscOptionsTruth("-mat_superlu_pivotgrowth","PivotGrowth","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
4289ce50919SHong Zhang     if (flg) lu->options.PivotGrowth = YES;
4294dc4c822SBarry Smith     ierr = PetscOptionsTruth("-mat_superlu_conditionnumber","ConditionNumber","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
4309ce50919SHong Zhang     if (flg) lu->options.ConditionNumber = YES;
4318914a3f7SHong Zhang     ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[0],&indx,&flg);CHKERRQ(ierr);
4329ce50919SHong Zhang     if (flg) {lu->options.RowPerm = (rowperm_t)indx;}
4334dc4c822SBarry Smith     ierr = PetscOptionsTruth("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
4349ce50919SHong Zhang     if (flg) lu->options.ReplaceTinyPivot = YES;
4354dc4c822SBarry Smith     ierr = PetscOptionsTruth("-mat_superlu_printstat","PrintStat","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
4369ce50919SHong Zhang     if (flg) lu->options.PrintStat = YES;
4378914a3f7SHong Zhang     ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,PETSC_NULL);CHKERRQ(ierr);
4385fe6150dSHong Zhang     if (lu->lwork > 0 ){
4395fe6150dSHong Zhang       ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
4405fe6150dSHong Zhang     } else if (lu->lwork != 0 && lu->lwork != -1){
44177431f27SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
4428914a3f7SHong Zhang       lu->lwork = 0;
4438914a3f7SHong Zhang     }
4449ce50919SHong Zhang   PetscOptionsEnd();
4459ce50919SHong Zhang 
44675af56d4SHong Zhang #ifdef SUPERLU2
4475c9eb25fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatCreateNull","MatCreateNull_SuperLU",(void(*)(void))MatCreateNull_SuperLU);CHKERRQ(ierr);
44875af56d4SHong Zhang #endif
4495c9eb25fSBarry Smith   B->spptr = lu;
450899d7b4fSKris Buschelman   *F = B;
45114b396bbSKris Buschelman   PetscFunctionReturn(0);
45214b396bbSKris Buschelman }
4535c9eb25fSBarry Smith EXTERN_C_END
454