xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision 7c4f633dc6bb6149cca88d301ead35a99e103cbb)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
214b396bbSKris Buschelman 
35a46d3faSBarry Smith /*  --------------------------------------------------------------------
45a46d3faSBarry Smith 
55a46d3faSBarry Smith      This file implements a subclass of the SeqAIJ matrix class that uses
65a46d3faSBarry Smith      the SuperLU 3.0 sparse solver. You can use this as a starting point for
75a46d3faSBarry Smith      implementing your own subclass of a PETSc matrix class.
85a46d3faSBarry Smith 
95a46d3faSBarry Smith      This demonstrates a way to make an implementation inheritence of a PETSc
105a46d3faSBarry Smith      matrix type. This means constructing a new matrix type (SuperLU) by changing some
115a46d3faSBarry Smith      of the methods of the previous type (SeqAIJ), adding additional data, and possibly
125a46d3faSBarry Smith      additional method. (See any book on object oriented programming).
1314b396bbSKris Buschelman */
1414b396bbSKris Buschelman 
155a46d3faSBarry Smith /*
165a46d3faSBarry Smith      Defines the data structure for the base matrix type (SeqAIJ)
175a46d3faSBarry Smith */
18*7c4f633dSBarry Smith #include "../src/mat/impls/aij/seq/aij.h"
1914b396bbSKris Buschelman 
205a46d3faSBarry Smith /*
215a46d3faSBarry Smith      SuperLU include files
225a46d3faSBarry Smith */
2314b396bbSKris Buschelman EXTERN_C_BEGIN
2414b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
2566f57492SHong Zhang #include "slu_zdefs.h"
2614b396bbSKris Buschelman #else
2766f57492SHong Zhang #include "slu_ddefs.h"
2814b396bbSKris Buschelman #endif
2966f57492SHong Zhang #include "slu_util.h"
3014b396bbSKris Buschelman EXTERN_C_END
3114b396bbSKris Buschelman 
325a46d3faSBarry Smith /*
335a46d3faSBarry Smith      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
345a46d3faSBarry Smith */
3514b396bbSKris Buschelman typedef struct {
3675af56d4SHong Zhang   SuperMatrix       A,L,U,B,X;
3775af56d4SHong Zhang   superlu_options_t options;
38da7a1d00SHong Zhang   PetscInt          *perm_c; /* column permutation vector */
39da7a1d00SHong Zhang   PetscInt          *perm_r; /* row permutations from partial pivoting */
40da7a1d00SHong Zhang   PetscInt          *etree;
41da7a1d00SHong Zhang   PetscReal         *R, *C;
4275af56d4SHong Zhang   char              equed[1];
43da7a1d00SHong Zhang   PetscInt          lwork;
4475af56d4SHong Zhang   void              *work;
45da7a1d00SHong Zhang   PetscReal         rpg, rcond;
4675af56d4SHong Zhang   mem_usage_t       mem_usage;
4775af56d4SHong Zhang   MatStructure      flg;
4814b396bbSKris Buschelman 
4914b396bbSKris Buschelman   /* Flag to clean up (non-global) SuperLU objects during Destroy */
5014b396bbSKris Buschelman   PetscTruth CleanUpSuperLU;
51f0c56d0fSKris Buschelman } Mat_SuperLU;
5214b396bbSKris Buschelman 
53e0e586b9SHong Zhang extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
540481f469SBarry Smith extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo *);
55e0e586b9SHong Zhang extern PetscErrorCode MatDestroy_SuperLU(Mat);
56e0e586b9SHong Zhang extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
57e0e586b9SHong Zhang extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
58e0e586b9SHong Zhang extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
59e0e586b9SHong Zhang extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
600481f469SBarry Smith extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*);
61e0e586b9SHong Zhang extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat *);
62e0e586b9SHong Zhang 
635a46d3faSBarry Smith /*
645a46d3faSBarry Smith     Utility function
655a46d3faSBarry Smith */
665a46d3faSBarry Smith #undef __FUNCT__
675a46d3faSBarry Smith #define __FUNCT__ "MatFactorInfo_SuperLU"
685a46d3faSBarry Smith PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
695a46d3faSBarry Smith {
705a46d3faSBarry Smith   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
715a46d3faSBarry Smith   PetscErrorCode    ierr;
725a46d3faSBarry Smith   superlu_options_t options;
735a46d3faSBarry Smith 
745a46d3faSBarry Smith   PetscFunctionBegin;
755a46d3faSBarry Smith   /* check if matrix is superlu_dist type */
765a46d3faSBarry Smith   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
775a46d3faSBarry Smith 
785a46d3faSBarry Smith   options = lu->options;
795a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
805a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES": "NO");CHKERRQ(ierr);
815a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
825a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
835a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES": "NO");CHKERRQ(ierr);
845a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
855a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES": "NO");CHKERRQ(ierr);
865a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES": "NO");CHKERRQ(ierr);
875a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
885a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES": "NO");CHKERRQ(ierr);
895a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES": "NO");CHKERRQ(ierr);
905a46d3faSBarry Smith   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
915a46d3faSBarry Smith 
925a46d3faSBarry Smith   PetscFunctionReturn(0);
935a46d3faSBarry Smith }
945a46d3faSBarry Smith 
955a46d3faSBarry Smith /*
965a46d3faSBarry Smith     These are the methods provided to REPLACE the corresponding methods of the
975a46d3faSBarry Smith    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
985a46d3faSBarry Smith */
995a46d3faSBarry Smith #undef __FUNCT__
1005a46d3faSBarry Smith #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
1010481f469SBarry Smith PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
1025a46d3faSBarry Smith {
1035a46d3faSBarry Smith   Mat_SeqAIJ     *aa = (Mat_SeqAIJ*)(A)->data;
104719d5645SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)(F)->spptr;
1055a46d3faSBarry Smith   PetscErrorCode ierr;
1065a46d3faSBarry Smith   PetscInt       sinfo;
1075a46d3faSBarry Smith   SuperLUStat_t  stat;
1085a46d3faSBarry Smith   PetscReal      ferr, berr;
1095a46d3faSBarry Smith   NCformat       *Ustore;
1105a46d3faSBarry Smith   SCformat       *Lstore;
1115a46d3faSBarry Smith 
1125a46d3faSBarry Smith   PetscFunctionBegin;
1135a46d3faSBarry Smith   if (lu->flg == SAME_NONZERO_PATTERN){ /* successing numerical factorization */
1145a46d3faSBarry Smith     lu->options.Fact = SamePattern;
1155a46d3faSBarry Smith     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
1165a46d3faSBarry Smith     Destroy_SuperMatrix_Store(&lu->A);
1175a46d3faSBarry Smith     if ( lu->lwork >= 0 ) {
1185a46d3faSBarry Smith       Destroy_SuperNode_Matrix(&lu->L);
1195a46d3faSBarry Smith       Destroy_CompCol_Matrix(&lu->U);
1205a46d3faSBarry Smith       lu->options.Fact = SamePattern;
1215a46d3faSBarry Smith     }
1225a46d3faSBarry Smith   }
1235a46d3faSBarry Smith 
1245a46d3faSBarry Smith   /* Create the SuperMatrix for lu->A=A^T:
1255a46d3faSBarry Smith        Since SuperLU likes column-oriented matrices,we pass it the transpose,
1265a46d3faSBarry Smith        and then solve A^T X = B in MatSolve(). */
1275a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
128d0f46423SBarry Smith   zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,
1295a46d3faSBarry Smith                            SLU_NC,SLU_Z,SLU_GE);
1305a46d3faSBarry Smith #else
131d0f46423SBarry Smith   dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,
1325a46d3faSBarry Smith                            SLU_NC,SLU_D,SLU_GE);
1335a46d3faSBarry Smith #endif
1345a46d3faSBarry Smith 
1355a46d3faSBarry Smith   /* Initialize the statistics variables. */
1365a46d3faSBarry Smith   StatInit(&stat);
1375a46d3faSBarry Smith 
1385a46d3faSBarry Smith   /* Numerical factorization */
1395a46d3faSBarry Smith   lu->B.ncol = 0;  /* Indicate not to solve the system */
1405a46d3faSBarry Smith #if defined(PETSC_USE_COMPLEX)
1415a46d3faSBarry Smith    zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
1425a46d3faSBarry Smith            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
1435a46d3faSBarry Smith            &lu->mem_usage, &stat, &sinfo);
1445a46d3faSBarry Smith #else
1455a46d3faSBarry Smith   dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
1465a46d3faSBarry Smith            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
1475a46d3faSBarry Smith            &lu->mem_usage, &stat, &sinfo);
1485a46d3faSBarry Smith #endif
1495a46d3faSBarry Smith   if ( !sinfo || sinfo == lu->A.ncol+1 ) {
1505a46d3faSBarry Smith     if ( lu->options.PivotGrowth )
1515a46d3faSBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);
1525a46d3faSBarry Smith     if ( lu->options.ConditionNumber )
1535a46d3faSBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);
1545a46d3faSBarry Smith   } else if ( sinfo > 0 ){
1555a46d3faSBarry Smith     if ( lu->lwork == -1 ) {
1565a46d3faSBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol);
1575a46d3faSBarry Smith     } else {
1585a46d3faSBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",sinfo);
1595a46d3faSBarry Smith     }
1605a46d3faSBarry Smith   } else { /* sinfo < 0 */
1615a46d3faSBarry Smith     SETERRQ2(PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
1625a46d3faSBarry Smith   }
1635a46d3faSBarry Smith 
1645a46d3faSBarry Smith   if ( lu->options.PrintStat ) {
1655a46d3faSBarry Smith     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");
1665a46d3faSBarry Smith     StatPrint(&stat);
1675a46d3faSBarry Smith     Lstore = (SCformat *) lu->L.Store;
1685a46d3faSBarry Smith     Ustore = (NCformat *) lu->U.Store;
1695a46d3faSBarry Smith     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
1705a46d3faSBarry Smith     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
1715a46d3faSBarry Smith     ierr = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
1725a46d3faSBarry Smith     ierr = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\texpansions %D\n",
1735a46d3faSBarry Smith 	       lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6,
1745a46d3faSBarry Smith 	       lu->mem_usage.expansions);
1755a46d3faSBarry Smith   }
1765a46d3faSBarry Smith   StatFree(&stat);
1775a46d3faSBarry Smith 
1785a46d3faSBarry Smith   lu->flg = SAME_NONZERO_PATTERN;
179719d5645SBarry Smith   (F)->ops->solve            = MatSolve_SuperLU;
180719d5645SBarry Smith   (F)->ops->solvetranspose   = MatSolveTranspose_SuperLU;
1815a46d3faSBarry Smith   PetscFunctionReturn(0);
1825a46d3faSBarry Smith }
1835a46d3faSBarry Smith 
18414b396bbSKris Buschelman #undef __FUNCT__
185f0c56d0fSKris Buschelman #define __FUNCT__ "MatDestroy_SuperLU"
186dfbe8321SBarry Smith PetscErrorCode MatDestroy_SuperLU(Mat A)
18714b396bbSKris Buschelman {
188dfbe8321SBarry Smith   PetscErrorCode ierr;
189f0c56d0fSKris Buschelman   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
19014b396bbSKris Buschelman 
19114b396bbSKris Buschelman   PetscFunctionBegin;
19275af56d4SHong Zhang   if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
19375af56d4SHong Zhang     Destroy_SuperMatrix_Store(&lu->A);
19475af56d4SHong Zhang     Destroy_SuperMatrix_Store(&lu->B);
19575af56d4SHong Zhang     Destroy_SuperMatrix_Store(&lu->X);
1969ce50919SHong Zhang 
1979ce50919SHong Zhang     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
1989ce50919SHong Zhang     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
1999ce50919SHong Zhang     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
2009ce50919SHong Zhang     ierr = PetscFree(lu->R);CHKERRQ(ierr);
2019ce50919SHong Zhang     ierr = PetscFree(lu->C);CHKERRQ(ierr);
20275af56d4SHong Zhang     if ( lu->lwork >= 0 ) {
203fb3e25aaSKris Buschelman       Destroy_SuperNode_Matrix(&lu->L);
204fb3e25aaSKris Buschelman       Destroy_CompCol_Matrix(&lu->U);
20575af56d4SHong Zhang     }
206fb3e25aaSKris Buschelman   }
207b24902e0SBarry Smith   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
20814b396bbSKris Buschelman   PetscFunctionReturn(0);
20914b396bbSKris Buschelman }
21014b396bbSKris Buschelman 
21114b396bbSKris Buschelman #undef __FUNCT__
212f0c56d0fSKris Buschelman #define __FUNCT__ "MatView_SuperLU"
213dfbe8321SBarry Smith PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
21414b396bbSKris Buschelman {
215dfbe8321SBarry Smith   PetscErrorCode    ierr;
21632077d6dSBarry Smith   PetscTruth        iascii;
21714b396bbSKris Buschelman   PetscViewerFormat format;
21814b396bbSKris Buschelman 
21914b396bbSKris Buschelman   PetscFunctionBegin;
220b24902e0SBarry Smith   ierr = MatView_SeqAIJ(A,viewer);CHKERRQ(ierr);
22114b396bbSKris Buschelman 
22232077d6dSBarry Smith   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
22332077d6dSBarry Smith   if (iascii) {
22414b396bbSKris Buschelman     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
2252f59403fSHong Zhang     if (format == PETSC_VIEWER_ASCII_INFO) {
226f0c56d0fSKris Buschelman       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
22714b396bbSKris Buschelman     }
22814b396bbSKris Buschelman   }
22914b396bbSKris Buschelman   PetscFunctionReturn(0);
23014b396bbSKris Buschelman }
23114b396bbSKris Buschelman 
23214b396bbSKris Buschelman 
23314b396bbSKris Buschelman #undef __FUNCT__
234c29e884eSHong Zhang #define __FUNCT__ "MatSolve_SuperLU_Private"
235c29e884eSHong Zhang PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
23614b396bbSKris Buschelman {
237f0c56d0fSKris Buschelman   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
23875af56d4SHong Zhang   PetscScalar    *barray,*xarray;
239dfbe8321SBarry Smith   PetscErrorCode ierr;
240da7a1d00SHong Zhang   PetscInt       info,i;
24175af56d4SHong Zhang   SuperLUStat_t  stat;
242da7a1d00SHong Zhang   PetscReal      ferr,berr;
24314b396bbSKris Buschelman 
24414b396bbSKris Buschelman   PetscFunctionBegin;
245937a6b0eSHong Zhang   if ( lu->lwork == -1 ) {
246937a6b0eSHong Zhang     PetscFunctionReturn(0);
247937a6b0eSHong Zhang   }
24875af56d4SHong Zhang   lu->B.ncol = 1;   /* Set the number of right-hand side */
24975af56d4SHong Zhang   ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
25075af56d4SHong Zhang   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
2515fe6150dSHong Zhang 
2525fe6150dSHong Zhang #if defined(PETSC_USE_COMPLEX)
2535fe6150dSHong Zhang   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
2545fe6150dSHong Zhang   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
2555fe6150dSHong Zhang #else
2565fe6150dSHong Zhang   ((DNformat*)lu->B.Store)->nzval = barray;
25775af56d4SHong Zhang   ((DNformat*)lu->X.Store)->nzval = xarray;
2585fe6150dSHong Zhang #endif
25975af56d4SHong Zhang 
26075af56d4SHong Zhang   /* Initialize the statistics variables. */
26175af56d4SHong Zhang   StatInit(&stat);
26275af56d4SHong Zhang 
26375af56d4SHong Zhang   lu->options.Fact  = FACTORED; /* Indicate the factored form of A is supplied. */
2648914a3f7SHong Zhang #if defined(PETSC_USE_COMPLEX)
2658914a3f7SHong Zhang   zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
2668914a3f7SHong Zhang            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
2678914a3f7SHong Zhang            &lu->mem_usage, &stat, &info);
2688914a3f7SHong Zhang #else
26975af56d4SHong Zhang   dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
27075af56d4SHong Zhang            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
27175af56d4SHong Zhang            &lu->mem_usage, &stat, &info);
2728914a3f7SHong Zhang #endif
27375af56d4SHong Zhang   ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
27475af56d4SHong Zhang   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
27575af56d4SHong Zhang 
276958c9bccSBarry Smith   if ( !info || info == lu->A.ncol+1 ) {
27775af56d4SHong Zhang     if ( lu->options.IterRefine ) {
2788914a3f7SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
2798914a3f7SHong Zhang       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
28075af56d4SHong Zhang       for (i = 0; i < 1; ++i)
2818914a3f7SHong Zhang         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr, berr);
28275af56d4SHong Zhang     }
2838914a3f7SHong Zhang   } else if ( info > 0 ){
2848914a3f7SHong Zhang     if ( lu->lwork == -1 ) {
28577431f27SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
2868914a3f7SHong Zhang     } else {
28777431f27SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
2888914a3f7SHong Zhang     }
2898914a3f7SHong Zhang   } else if (info < 0){
29077431f27SBarry Smith     SETERRQ2(PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
29114b396bbSKris Buschelman   }
29214b396bbSKris Buschelman 
2938914a3f7SHong Zhang   if ( lu->options.PrintStat ) {
2948914a3f7SHong Zhang     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
2958914a3f7SHong Zhang     StatPrint(&stat);
2968914a3f7SHong Zhang   }
29775af56d4SHong Zhang   StatFree(&stat);
29875af56d4SHong Zhang   PetscFunctionReturn(0);
29975af56d4SHong Zhang }
30014b396bbSKris Buschelman 
30114b396bbSKris Buschelman #undef __FUNCT__
302c29e884eSHong Zhang #define __FUNCT__ "MatSolve_SuperLU"
303c29e884eSHong Zhang PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
304c29e884eSHong Zhang {
305c29e884eSHong Zhang   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
306c29e884eSHong Zhang   PetscErrorCode ierr;
307c29e884eSHong Zhang 
308c29e884eSHong Zhang   PetscFunctionBegin;
309c29e884eSHong Zhang   lu->options.Trans = TRANS;
310c29e884eSHong Zhang   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
311c29e884eSHong Zhang   PetscFunctionReturn(0);
312c29e884eSHong Zhang }
313c29e884eSHong Zhang 
314c29e884eSHong Zhang #undef __FUNCT__
315c7c1fe80SHong Zhang #define __FUNCT__ "MatSolveTranspose_SuperLU"
316c7c1fe80SHong Zhang PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
317c7c1fe80SHong Zhang {
318c7c1fe80SHong Zhang   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
319c7c1fe80SHong Zhang   PetscErrorCode ierr;
320c7c1fe80SHong Zhang 
321c7c1fe80SHong Zhang   PetscFunctionBegin;
322c7c1fe80SHong Zhang   lu->options.Trans = NOTRANS;
323c29e884eSHong Zhang   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
324c7c1fe80SHong Zhang   PetscFunctionReturn(0);
325c7c1fe80SHong Zhang }
326c7c1fe80SHong Zhang 
32714b396bbSKris Buschelman 
32814b396bbSKris Buschelman /*
32914b396bbSKris Buschelman    Note the r permutation is ignored
33014b396bbSKris Buschelman */
33114b396bbSKris Buschelman #undef __FUNCT__
332f0c56d0fSKris Buschelman #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
3330481f469SBarry Smith PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
33414b396bbSKris Buschelman {
335719d5645SBarry Smith   Mat_SuperLU    *lu = (Mat_SuperLU*)((F)->spptr);
336b24902e0SBarry Smith   PetscErrorCode ierr;
337d0f46423SBarry Smith   PetscInt       m=A->rmap->n,n=A->cmap->n;
338b24902e0SBarry Smith 
339b24902e0SBarry Smith   PetscFunctionBegin;
340b24902e0SBarry Smith 
341b24902e0SBarry Smith   /* Allocate spaces (notice sizes are for the transpose) */
342b24902e0SBarry Smith   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->etree);CHKERRQ(ierr);
343b24902e0SBarry Smith   ierr = PetscMalloc(n*sizeof(PetscInt),&lu->perm_r);CHKERRQ(ierr);
344b24902e0SBarry Smith   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->perm_c);CHKERRQ(ierr);
345b24902e0SBarry Smith   ierr = PetscMalloc(n*sizeof(PetscInt),&lu->R);CHKERRQ(ierr);
346b24902e0SBarry Smith   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->C);CHKERRQ(ierr);
347b24902e0SBarry Smith 
348b24902e0SBarry Smith   /* create rhs and solution x without allocate space for .Store */
349b24902e0SBarry Smith #if defined(PETSC_USE_COMPLEX)
350b24902e0SBarry Smith   zCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE);
351b24902e0SBarry Smith   zCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_Z, SLU_GE);
352b24902e0SBarry Smith #else
353b24902e0SBarry Smith   dCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
354b24902e0SBarry Smith   dCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
355b24902e0SBarry Smith #endif
356b24902e0SBarry Smith 
357b24902e0SBarry Smith   lu->flg            = DIFFERENT_NONZERO_PATTERN;
358b24902e0SBarry Smith   lu->CleanUpSuperLU = PETSC_TRUE;
359719d5645SBarry Smith   (F)->ops->lufactornumeric  = MatLUFactorNumeric_SuperLU;
360b24902e0SBarry Smith   PetscFunctionReturn(0);
361b24902e0SBarry Smith }
362b24902e0SBarry Smith 
36335bd34faSBarry Smith EXTERN_C_BEGIN
36435bd34faSBarry Smith #undef __FUNCT__
36535bd34faSBarry Smith #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu"
36635bd34faSBarry Smith PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
36735bd34faSBarry Smith {
36835bd34faSBarry Smith   PetscFunctionBegin;
36935bd34faSBarry Smith   *type = MAT_SOLVER_SUPERLU;
37035bd34faSBarry Smith   PetscFunctionReturn(0);
37135bd34faSBarry Smith }
37235bd34faSBarry Smith EXTERN_C_END
37335bd34faSBarry Smith 
374b24902e0SBarry Smith 
375b24902e0SBarry Smith /*MC
3765c9eb25fSBarry Smith   MAT_SOLVER_SUPERLU = "superlu" - A matrix type providing direct solvers (LU) for sequential matrices
377b24902e0SBarry Smith   via the external package SuperLU.
378b24902e0SBarry Smith 
3795c9eb25fSBarry Smith   Use config/configure.py --download-superlu to have PETSc installed with SuperLU
380b24902e0SBarry Smith 
381b24902e0SBarry Smith   Options Database Keys:
3825c9eb25fSBarry Smith + -mat_superlu_ordering <0,1,2,3> - 0: natural ordering,
383b24902e0SBarry Smith                                     1: MMD applied to A'*A,
384b24902e0SBarry Smith                                     2: MMD applied to A'+A,
385b24902e0SBarry Smith                                     3: COLAMD, approximate minimum degree column ordering
386b24902e0SBarry Smith . -mat_superlu_iterrefine - have SuperLU do iterative refinement after the triangular solve
387b24902e0SBarry Smith                           choices: NOREFINE, SINGLE, DOUBLE, EXTRA; default is NOREFINE
388b24902e0SBarry Smith - -mat_superlu_printstat - print SuperLU statistics about the factorization
389b24902e0SBarry Smith 
3905c9eb25fSBarry Smith    Notes: Do not confuse this with MAT_SOLVER_SUPERLU_DIST which is for parallel sparse solves
3915c9eb25fSBarry Smith 
392b24902e0SBarry Smith    Level: beginner
393b24902e0SBarry Smith 
3945c9eb25fSBarry Smith .seealso: PCLU, MAT_SOLVER_SUPERLU_DIST, MAT_SOLVER_MUMPS, MAT_SOLVER_SPOOLES
395b24902e0SBarry Smith M*/
396b24902e0SBarry Smith 
3975c9eb25fSBarry Smith EXTERN_C_BEGIN
398b24902e0SBarry Smith #undef __FUNCT__
399b24902e0SBarry Smith #define __FUNCT__ "MatGetFactor_seqaij_superlu"
4005c9eb25fSBarry Smith PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
401b24902e0SBarry Smith {
40214b396bbSKris Buschelman   Mat            B;
403f0c56d0fSKris Buschelman   Mat_SuperLU    *lu;
4046849ba73SBarry Smith   PetscErrorCode ierr;
405e631078cSBarry Smith   PetscInt       indx;
4069ce50919SHong Zhang   PetscTruth     flg;
4075ca28756SHong Zhang   const char     *colperm[]={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
4085ca28756SHong Zhang   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
4095ca28756SHong Zhang   const char     *rowperm[]={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
41014b396bbSKris Buschelman 
41114b396bbSKris Buschelman   PetscFunctionBegin;
4127adad957SLisandro Dalcin   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
413d0f46423SBarry Smith   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
4147adad957SLisandro Dalcin   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
415899d7b4fSKris Buschelman   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
416f0c56d0fSKris Buschelman 
417b24902e0SBarry Smith   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU;
418b24902e0SBarry Smith   B->ops->destroy          = MatDestroy_SuperLU;
4193519fcdcSHong Zhang   B->ops->view             = MatView_SuperLU;
4205c9eb25fSBarry Smith   B->factor                = MAT_FACTOR_LU;
42194b7f48cSBarry Smith   B->assembled             = PETSC_TRUE;  /* required by -ksp_view */
4225c9eb25fSBarry Smith   B->preallocated          = PETSC_TRUE;
42314b396bbSKris Buschelman 
424b24902e0SBarry Smith   ierr = PetscNewLog(B,Mat_SuperLU,&lu);CHKERRQ(ierr);
4259ce50919SHong Zhang   set_default_options(&lu->options);
4268914a3f7SHong Zhang   /* equilibration causes error in solve(), thus not supported here. See dgssvx.c for possible reason. */
4278914a3f7SHong Zhang   lu->options.Equil = NO;
4289ce50919SHong Zhang   lu->options.PrintStat = NO;
4298914a3f7SHong Zhang   lu->lwork = 0;   /* allocate space internally by system malloc */
4309ce50919SHong Zhang 
4317adad957SLisandro Dalcin   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
4328914a3f7SHong Zhang     ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
4339ce50919SHong Zhang     if (flg) {lu->options.ColPerm = (colperm_t)indx;}
4348914a3f7SHong Zhang     ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
4359ce50919SHong Zhang     if (flg) { lu->options.IterRefine = (IterRefine_t)indx;}
4364dc4c822SBarry Smith     ierr = PetscOptionsTruth("-mat_superlu_symmetricmode","SymmetricMode","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
4379ce50919SHong Zhang     if (flg) lu->options.SymmetricMode = YES;
4388914a3f7SHong Zhang     ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&lu->options.DiagPivotThresh,PETSC_NULL);CHKERRQ(ierr);
4394dc4c822SBarry Smith     ierr = PetscOptionsTruth("-mat_superlu_pivotgrowth","PivotGrowth","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
4409ce50919SHong Zhang     if (flg) lu->options.PivotGrowth = YES;
4414dc4c822SBarry Smith     ierr = PetscOptionsTruth("-mat_superlu_conditionnumber","ConditionNumber","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
4429ce50919SHong Zhang     if (flg) lu->options.ConditionNumber = YES;
4438914a3f7SHong Zhang     ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[0],&indx,&flg);CHKERRQ(ierr);
4449ce50919SHong Zhang     if (flg) {lu->options.RowPerm = (rowperm_t)indx;}
4454dc4c822SBarry Smith     ierr = PetscOptionsTruth("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
4469ce50919SHong Zhang     if (flg) lu->options.ReplaceTinyPivot = YES;
4474dc4c822SBarry Smith     ierr = PetscOptionsTruth("-mat_superlu_printstat","PrintStat","None",PETSC_FALSE,&flg,0);CHKERRQ(ierr);
4489ce50919SHong Zhang     if (flg) lu->options.PrintStat = YES;
4498914a3f7SHong Zhang     ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,PETSC_NULL);CHKERRQ(ierr);
4505fe6150dSHong Zhang     if (lu->lwork > 0 ){
4515fe6150dSHong Zhang       ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
4525fe6150dSHong Zhang     } else if (lu->lwork != 0 && lu->lwork != -1){
45377431f27SBarry Smith       ierr = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
4548914a3f7SHong Zhang       lu->lwork = 0;
4558914a3f7SHong Zhang     }
4569ce50919SHong Zhang   PetscOptionsEnd();
4579ce50919SHong Zhang 
45875af56d4SHong Zhang #ifdef SUPERLU2
4595c9eb25fSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatCreateNull","MatCreateNull_SuperLU",(void(*)(void))MatCreateNull_SuperLU);CHKERRQ(ierr);
46075af56d4SHong Zhang #endif
46135bd34faSBarry Smith   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_seqaij_superlu",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
4625c9eb25fSBarry Smith   B->spptr = lu;
463899d7b4fSKris Buschelman   *F = B;
46414b396bbSKris Buschelman   PetscFunctionReturn(0);
46514b396bbSKris Buschelman }
4665c9eb25fSBarry Smith EXTERN_C_END
467