xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision 9a6d0b0b105e1526bdcbd24334c9fc61d6648a0c)
1 
2 /*  --------------------------------------------------------------------
3 
4      This file implements a subclass of the SeqAIJ matrix class that uses
5      the SuperLU sparse solver. You can use this as a starting point for
6      implementing your own subclass of a PETSc matrix class.
7 
8      This demonstrates a way to make an implementation inheritence of a PETSc
9      matrix type. This means constructing a new matrix type (SuperLU) by changing some
10      of the methods of the previous type (SeqAIJ), adding additional data, and possibly
11      additional method. (See any book on object oriented programming).
12 */
13 
14 /*
15      Defines the data structure for the base matrix type (SeqAIJ)
16 */
17 #include <../src/mat/impls/aij/seq/aij.h>    /*I "petscmat.h" I*/
18 #include <petsc-private/vecimpl.h>           /* only to access vec->map */
19 
20 /*
21      SuperLU include files
22 */
23 EXTERN_C_BEGIN
24 #if defined(PETSC_USE_COMPLEX)
25 #if defined(PETSC_USE_REAL_SINGLE)
26 #include <slu_cdefs.h>
27 #else
28 #include <slu_zdefs.h>
29 #endif
30 #else
31 #if defined(PETSC_USE_REAL_SINGLE)
32 #include <slu_sdefs.h>
33 #else
34 #include <slu_ddefs.h>
35 #endif
36 #endif
37 #include <slu_util.h>
38 EXTERN_C_END
39 
40 /*
41      This is the data we are "ADDING" to the SeqAIJ matrix type to get the SuperLU matrix type
42 */
43 typedef struct {
44   SuperMatrix       A,L,U,B,X;
45   superlu_options_t options;
46   PetscInt          *perm_c; /* column permutation vector */
47   PetscInt          *perm_r; /* row permutations from partial pivoting */
48   PetscInt          *etree;
49   PetscReal         *R, *C;
50   char              equed[1];
51   PetscInt          lwork;
52   void              *work;
53   PetscReal         rpg, rcond;
54   mem_usage_t       mem_usage;
55   MatStructure      flg;
56   SuperLUStat_t     stat;
57   Mat               A_dup;
58   PetscScalar       *rhs_dup;
59 
60   /* Flag to clean up (non-global) SuperLU objects during Destroy */
61   PetscBool CleanUpSuperLU;
62 } Mat_SuperLU;
63 
64 extern PetscErrorCode MatFactorInfo_SuperLU(Mat,PetscViewer);
65 extern PetscErrorCode MatLUFactorNumeric_SuperLU(Mat,Mat,const MatFactorInfo*);
66 extern PetscErrorCode MatDestroy_SuperLU(Mat);
67 extern PetscErrorCode MatView_SuperLU(Mat,PetscViewer);
68 extern PetscErrorCode MatAssemblyEnd_SuperLU(Mat,MatAssemblyType);
69 extern PetscErrorCode MatSolve_SuperLU(Mat,Vec,Vec);
70 extern PetscErrorCode MatMatSolve_SuperLU(Mat,Mat,Mat);
71 extern PetscErrorCode MatSolveTranspose_SuperLU(Mat,Vec,Vec);
72 extern PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat,Mat,IS,IS,const MatFactorInfo*);
73 extern PetscErrorCode MatDuplicate_SuperLU(Mat, MatDuplicateOption, Mat*);
74 
75 /*
76     Utility function
77 */
78 #undef __FUNCT__
79 #define __FUNCT__ "MatFactorInfo_SuperLU"
80 PetscErrorCode MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
81 {
82   Mat_SuperLU       *lu= (Mat_SuperLU*)A->spptr;
83   PetscErrorCode    ierr;
84   superlu_options_t options;
85 
86   PetscFunctionBegin;
87   /* check if matrix is superlu_dist type */
88   if (A->ops->solve != MatSolve_SuperLU) PetscFunctionReturn(0);
89 
90   options = lu->options;
91 
92   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
93   ierr = PetscViewerASCIIPrintf(viewer,"  Equil: %s\n",(options.Equil != NO) ? "YES" : "NO");CHKERRQ(ierr);
94   ierr = PetscViewerASCIIPrintf(viewer,"  ColPerm: %D\n",options.ColPerm);CHKERRQ(ierr);
95   ierr = PetscViewerASCIIPrintf(viewer,"  IterRefine: %D\n",options.IterRefine);CHKERRQ(ierr);
96   ierr = PetscViewerASCIIPrintf(viewer,"  SymmetricMode: %s\n",(options.SymmetricMode != NO) ? "YES" : "NO");CHKERRQ(ierr);
97   ierr = PetscViewerASCIIPrintf(viewer,"  DiagPivotThresh: %g\n",options.DiagPivotThresh);CHKERRQ(ierr);
98   ierr = PetscViewerASCIIPrintf(viewer,"  PivotGrowth: %s\n",(options.PivotGrowth != NO) ? "YES" : "NO");CHKERRQ(ierr);
99   ierr = PetscViewerASCIIPrintf(viewer,"  ConditionNumber: %s\n",(options.ConditionNumber != NO) ? "YES" : "NO");CHKERRQ(ierr);
100   ierr = PetscViewerASCIIPrintf(viewer,"  RowPerm: %D\n",options.RowPerm);CHKERRQ(ierr);
101   ierr = PetscViewerASCIIPrintf(viewer,"  ReplaceTinyPivot: %s\n",(options.ReplaceTinyPivot != NO) ? "YES" : "NO");CHKERRQ(ierr);
102   ierr = PetscViewerASCIIPrintf(viewer,"  PrintStat: %s\n",(options.PrintStat != NO) ? "YES" : "NO");CHKERRQ(ierr);
103   ierr = PetscViewerASCIIPrintf(viewer,"  lwork: %D\n",lu->lwork);CHKERRQ(ierr);
104   if (A->factortype == MAT_FACTOR_ILU) {
105     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropTol: %g\n",options.ILU_DropTol);CHKERRQ(ierr);
106     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillTol: %g\n",options.ILU_FillTol);CHKERRQ(ierr);
107     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_FillFactor: %g\n",options.ILU_FillFactor);CHKERRQ(ierr);
108     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_DropRule: %D\n",options.ILU_DropRule);CHKERRQ(ierr);
109     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_Norm: %D\n",options.ILU_Norm);CHKERRQ(ierr);
110     ierr = PetscViewerASCIIPrintf(viewer,"  ILU_MILU: %D\n",options.ILU_MILU);CHKERRQ(ierr);
111   }
112   PetscFunctionReturn(0);
113 }
114 
115 /*
116     These are the methods provided to REPLACE the corresponding methods of the
117    SeqAIJ matrix class. Other methods of SeqAIJ are not replaced
118 */
119 #undef __FUNCT__
120 #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
121 PetscErrorCode MatLUFactorNumeric_SuperLU(Mat F,Mat A,const MatFactorInfo *info)
122 {
123   Mat_SuperLU    *lu = (Mat_SuperLU*)F->spptr;
124   Mat_SeqAIJ     *aa;
125   PetscErrorCode ierr;
126   PetscInt       sinfo;
127   PetscReal      ferr, berr;
128   NCformat       *Ustore;
129   SCformat       *Lstore;
130 
131   PetscFunctionBegin;
132   if (lu->flg == SAME_NONZERO_PATTERN) { /* successing numerical factorization */
133     lu->options.Fact = SamePattern;
134     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
135     Destroy_SuperMatrix_Store(&lu->A);
136     if (lu->options.Equil) {
137       ierr = MatCopy_SeqAIJ(A,lu->A_dup,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
138     }
139     if (lu->lwork >= 0) {
140       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
141       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
142       lu->options.Fact = SamePattern;
143     }
144   }
145 
146   /* Create the SuperMatrix for lu->A=A^T:
147        Since SuperLU likes column-oriented matrices,we pass it the transpose,
148        and then solve A^T X = B in MatSolve(). */
149   if (lu->options.Equil) {
150     aa = (Mat_SeqAIJ*)(lu->A_dup)->data;
151   } else {
152     aa = (Mat_SeqAIJ*)(A)->data;
153   }
154 #if defined(PETSC_USE_COMPLEX)
155 #if defined(PETSC_USE_REAL_SINGLE)
156   PetscStackCall("SuperLU:cCreate_CompCol_Matrix",cCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(singlecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_C,SLU_GE));
157 #else
158   PetscStackCall("SuperLU:zCreate_CompCol_Matrix",zCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,(doublecomplex*)aa->a,aa->j,aa->i,SLU_NC,SLU_Z,SLU_GE));
159 #endif
160 #else
161 #if defined(PETSC_USE_REAL_SINGLE)
162   PetscStackCall("SuperLU:sCreate_CompCol_Matrix",sCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_S,SLU_GE));
163 #else
164   PetscStackCall("SuperLU:dCreate_CompCol_Matrix",dCreate_CompCol_Matrix(&lu->A,A->cmap->n,A->rmap->n,aa->nz,aa->a,aa->j,aa->i,SLU_NC,SLU_D,SLU_GE));
165 #endif
166 #endif
167 
168   /* Numerical factorization */
169   lu->B.ncol = 0;  /* Indicate not to solve the system */
170   if (F->factortype == MAT_FACTOR_LU) {
171 #if defined(PETSC_USE_COMPLEX)
172 #if defined(PETSC_USE_REAL_SINGLE)
173     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
174                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
175                                      &lu->mem_usage, &lu->stat, &sinfo));
176 #else
177     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
178                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
179                                      &lu->mem_usage, &lu->stat, &sinfo));
180 #endif
181 #else
182 #if defined(PETSC_USE_REAL_SINGLE)
183     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
184                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
185                                      &lu->mem_usage, &lu->stat, &sinfo));
186 #else
187     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
188                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
189                                      &lu->mem_usage, &lu->stat, &sinfo));
190 #endif
191 #endif
192   } else if (F->factortype == MAT_FACTOR_ILU) {
193     /* Compute the incomplete factorization, condition number and pivot growth */
194 #if defined(PETSC_USE_COMPLEX)
195 #if defined(PETSC_USE_REAL_SINGLE)
196     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
197                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
198                                      &lu->mem_usage, &lu->stat, &sinfo));
199 #else
200     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r,lu->etree, lu->equed, lu->R, lu->C,
201                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
202                                      &lu->mem_usage, &lu->stat, &sinfo));
203 #endif
204 #else
205 #if defined(PETSC_USE_REAL_SINGLE)
206     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
207                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
208                                      &lu->mem_usage, &lu->stat, &sinfo));
209 #else
210     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
211                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
212                                      &lu->mem_usage, &lu->stat, &sinfo));
213 #endif
214 #endif
215   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
216   if (!sinfo || sinfo == lu->A.ncol+1) {
217     if (lu->options.PivotGrowth) {
218       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. pivot growth = %e\n", lu->rpg);
219     }
220     if (lu->options.ConditionNumber) {
221       ierr = PetscPrintf(PETSC_COMM_SELF,"  Recip. condition number = %e\n", lu->rcond);
222     }
223   } else if (sinfo > 0) {
224     if (lu->lwork == -1) {
225       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", sinfo - lu->A.ncol);
226     } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %D",sinfo);
227   } else SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", sinfo,-sinfo);
228 
229   if (lu->options.PrintStat) {
230     ierr = PetscPrintf(PETSC_COMM_SELF,"MatLUFactorNumeric_SuperLU():\n");
231     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
232     Lstore = (SCformat*) lu->L.Store;
233     Ustore = (NCformat*) lu->U.Store;
234     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor L = %D\n", Lstore->nnz);
235     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in factor U = %D\n", Ustore->nnz);
236     ierr   = PetscPrintf(PETSC_COMM_SELF,"  No of nonzeros in L+U = %D\n", Lstore->nnz + Ustore->nnz - lu->A.ncol);
237     ierr   = PetscPrintf(PETSC_COMM_SELF,"  L\\U MB %.3f\ttotal MB needed %.3f\n",
238                          lu->mem_usage.for_lu/1e6, lu->mem_usage.total_needed/1e6);
239   }
240 
241   lu->flg                = SAME_NONZERO_PATTERN;
242   F->ops->solve          = MatSolve_SuperLU;
243   F->ops->solvetranspose = MatSolveTranspose_SuperLU;
244   F->ops->matsolve       = MatMatSolve_SuperLU;
245   PetscFunctionReturn(0);
246 }
247 
248 #undef __FUNCT__
249 #define __FUNCT__ "MatDestroy_SuperLU"
250 PetscErrorCode MatDestroy_SuperLU(Mat A)
251 {
252   PetscErrorCode ierr;
253   Mat_SuperLU    *lu=(Mat_SuperLU*)A->spptr;
254 
255   PetscFunctionBegin;
256   if (lu && lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
257     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->A));
258     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->B));
259     PetscStackCall("SuperLU:Destroy_SuperMatrix_Store",Destroy_SuperMatrix_Store(&lu->X));
260     PetscStackCall("SuperLU:StatFree",StatFree(&lu->stat));
261     if (lu->lwork >= 0) {
262       PetscStackCall("SuperLU:Destroy_SuperNode_Matrix",Destroy_SuperNode_Matrix(&lu->L));
263       PetscStackCall("SuperLU:Destroy_CompCol_Matrix",Destroy_CompCol_Matrix(&lu->U));
264     }
265   }
266   if (lu) {
267     ierr = PetscFree(lu->etree);CHKERRQ(ierr);
268     ierr = PetscFree(lu->perm_r);CHKERRQ(ierr);
269     ierr = PetscFree(lu->perm_c);CHKERRQ(ierr);
270     ierr = PetscFree(lu->R);CHKERRQ(ierr);
271     ierr = PetscFree(lu->C);CHKERRQ(ierr);
272     ierr = PetscFree(lu->rhs_dup);CHKERRQ(ierr);
273     ierr = MatDestroy(&lu->A_dup);CHKERRQ(ierr);
274   }
275   ierr = PetscFree(A->spptr);CHKERRQ(ierr);
276 
277   /* clear composed functions */
278   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatFactorGetSolverPackage_C","",NULL);CHKERRQ(ierr);
279   ierr = PetscObjectComposeFunctionDynamic((PetscObject)A,"MatSuperluSetILUDropTol_C","",NULL);CHKERRQ(ierr);
280 
281   ierr = MatDestroy_SeqAIJ(A);CHKERRQ(ierr);
282   PetscFunctionReturn(0);
283 }
284 
285 #undef __FUNCT__
286 #define __FUNCT__ "MatView_SuperLU"
287 PetscErrorCode MatView_SuperLU(Mat A,PetscViewer viewer)
288 {
289   PetscErrorCode    ierr;
290   PetscBool         iascii;
291   PetscViewerFormat format;
292 
293   PetscFunctionBegin;
294   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
295   if (iascii) {
296     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
297     if (format == PETSC_VIEWER_ASCII_INFO) {
298       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
299     }
300   }
301   PetscFunctionReturn(0);
302 }
303 
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatSolve_SuperLU_Private"
307 PetscErrorCode MatSolve_SuperLU_Private(Mat A,Vec b,Vec x)
308 {
309   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
310   PetscScalar    *barray,*xarray;
311   PetscErrorCode ierr;
312   PetscInt       info,i,n=x->map->n;
313   PetscReal      ferr,berr;
314 
315   PetscFunctionBegin;
316   if (lu->lwork == -1) PetscFunctionReturn(0);
317 
318   lu->B.ncol = 1;   /* Set the number of right-hand side */
319   if (lu->options.Equil && !lu->rhs_dup) {
320     /* superlu overwrites b when Equil is used, thus create rhs_dup to keep user's b unchanged */
321     ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->rhs_dup);CHKERRQ(ierr);
322   }
323   if (lu->options.Equil) {
324     /* Copy b into rsh_dup */
325     ierr   = VecGetArray(b,&barray);CHKERRQ(ierr);
326     ierr   = PetscMemcpy(lu->rhs_dup,barray,n*sizeof(PetscScalar));CHKERRQ(ierr);
327     ierr   = VecRestoreArray(b,&barray);CHKERRQ(ierr);
328     barray = lu->rhs_dup;
329   } else {
330     ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
331   }
332   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
333 
334 #if defined(PETSC_USE_COMPLEX)
335 #if defined(PETSC_USE_REAL_SINGLE)
336   ((DNformat*)lu->B.Store)->nzval = (singlecomplex*)barray;
337   ((DNformat*)lu->X.Store)->nzval = (singlecomplex*)xarray;
338 #else
339   ((DNformat*)lu->B.Store)->nzval = (doublecomplex*)barray;
340   ((DNformat*)lu->X.Store)->nzval = (doublecomplex*)xarray;
341 #endif
342 #else
343   ((DNformat*)lu->B.Store)->nzval = barray;
344   ((DNformat*)lu->X.Store)->nzval = xarray;
345 #endif
346 
347   lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */
348   if (A->factortype == MAT_FACTOR_LU) {
349 #if defined(PETSC_USE_COMPLEX)
350 #if defined(PETSC_USE_REAL_SINGLE)
351     PetscStackCall("SuperLU:cgssvx",cgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
352                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
353                                      &lu->mem_usage, &lu->stat, &info));
354 #else
355     PetscStackCall("SuperLU:zgssvx",zgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
356                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
357                                      &lu->mem_usage, &lu->stat, &info));
358 #endif
359 #else
360 #if defined(PETSC_USE_REAL_SINGLE)
361     PetscStackCall("SuperLU:sgssvx",sgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
362                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
363                                      &lu->mem_usage, &lu->stat, &info));
364 #else
365     PetscStackCall("SuperLU:dgssvx",dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
366                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
367                                      &lu->mem_usage, &lu->stat, &info));
368 #endif
369 #endif
370   } else if (A->factortype == MAT_FACTOR_ILU) {
371 #if defined(PETSC_USE_COMPLEX)
372 #if defined(PETSC_USE_REAL_SINGLE)
373     PetscStackCall("SuperLU:cgsisx",cgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
374                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
375                                      &lu->mem_usage, &lu->stat, &info));
376 #else
377     PetscStackCall("SuperLU:zgsisx",zgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
378                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
379                                      &lu->mem_usage, &lu->stat, &info));
380 #endif
381 #else
382 #if defined(PETSC_USE_REAL_SINGLE)
383     PetscStackCall("SuperLU:sgsisx",sgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
384                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
385                                      &lu->mem_usage, &lu->stat, &info));
386 #else
387     PetscStackCall("SuperLU:dgsisx",dgsisx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
388                                      &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond,
389                                      &lu->mem_usage, &lu->stat, &info));
390 #endif
391 #endif
392   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
393   if (!lu->options.Equil) {
394     ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
395   }
396   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
397 
398   if (!info || info == lu->A.ncol+1) {
399     if (lu->options.IterRefine) {
400       ierr = PetscPrintf(PETSC_COMM_SELF,"Iterative Refinement:\n");
401       ierr = PetscPrintf(PETSC_COMM_SELF,"  %8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
402       for (i = 0; i < 1; ++i) {
403         ierr = PetscPrintf(PETSC_COMM_SELF,"  %8d%8d%16e%16e\n", i+1, lu->stat.RefineSteps, ferr, berr);
404       }
405     }
406   } else if (info > 0) {
407     if (lu->lwork == -1) {
408       ierr = PetscPrintf(PETSC_COMM_SELF,"  ** Estimated memory: %D bytes\n", info - lu->A.ncol);
409     } else {
410       ierr = PetscPrintf(PETSC_COMM_SELF,"  Warning: gssvx() returns info %D\n",info);
411     }
412   } else if (info < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %D, the %D-th argument in gssvx() had an illegal value", info,-info);
413 
414   if (lu->options.PrintStat) {
415     ierr = PetscPrintf(PETSC_COMM_SELF,"MatSolve__SuperLU():\n");
416     PetscStackCall("SuperLU:StatPrint",StatPrint(&lu->stat));
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatSolve_SuperLU"
423 PetscErrorCode MatSolve_SuperLU(Mat A,Vec b,Vec x)
424 {
425   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
426   PetscErrorCode ierr;
427 
428   PetscFunctionBegin;
429   lu->options.Trans = TRANS;
430 
431   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
432   PetscFunctionReturn(0);
433 }
434 
435 #undef __FUNCT__
436 #define __FUNCT__ "MatSolveTranspose_SuperLU"
437 PetscErrorCode MatSolveTranspose_SuperLU(Mat A,Vec b,Vec x)
438 {
439   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
440   PetscErrorCode ierr;
441 
442   PetscFunctionBegin;
443   lu->options.Trans = NOTRANS;
444 
445   ierr = MatSolve_SuperLU_Private(A,b,x);CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "MatMatSolve_SuperLU"
451 PetscErrorCode MatMatSolve_SuperLU(Mat A,Mat B,Mat X)
452 {
453   Mat_SuperLU    *lu = (Mat_SuperLU*)A->spptr;
454   PetscBool      flg;
455   PetscErrorCode ierr;
456 
457   PetscFunctionBegin;
458   ierr = PetscObjectTypeCompareAny((PetscObject)B,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
459   if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix");
460   ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr);
461   if (!flg) SETERRQ(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix");
462   lu->options.Trans = TRANS;
463   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatMatSolve_SuperLU() is not implemented yet");
464   PetscFunctionReturn(0);
465 }
466 
467 /*
468    Note the r permutation is ignored
469 */
470 #undef __FUNCT__
471 #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
472 PetscErrorCode MatLUFactorSymbolic_SuperLU(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
473 {
474   Mat_SuperLU *lu = (Mat_SuperLU*)(F->spptr);
475 
476   PetscFunctionBegin;
477   lu->flg                 = DIFFERENT_NONZERO_PATTERN;
478   lu->CleanUpSuperLU      = PETSC_TRUE;
479   F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
480   PetscFunctionReturn(0);
481 }
482 
483 EXTERN_C_BEGIN
484 #undef __FUNCT__
485 #define __FUNCT__ "MatSuperluSetILUDropTol_SuperLU"
486 PetscErrorCode MatSuperluSetILUDropTol_SuperLU(Mat F,PetscReal dtol)
487 {
488   Mat_SuperLU *lu= (Mat_SuperLU*)F->spptr;
489 
490   PetscFunctionBegin;
491   lu->options.ILU_DropTol = dtol;
492   PetscFunctionReturn(0);
493 }
494 EXTERN_C_END
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatSuperluSetILUDropTol"
498 /*@
499   MatSuperluSetILUDropTol - Set SuperLU ILU drop tolerance
500    Logically Collective on Mat
501 
502    Input Parameters:
503 +  F - the factored matrix obtained by calling MatGetFactor() from PETSc-SuperLU interface
504 -  dtol - drop tolerance
505 
506   Options Database:
507 .   -mat_superlu_ilu_droptol <dtol>
508 
509    Level: beginner
510 
511    References: SuperLU Users' Guide
512 
513 .seealso: MatGetFactor()
514 @*/
515 PetscErrorCode MatSuperluSetILUDropTol(Mat F,PetscReal dtol)
516 {
517   PetscErrorCode ierr;
518 
519   PetscFunctionBegin;
520   PetscValidHeaderSpecific(F,MAT_CLASSID,1);
521   PetscValidLogicalCollectiveInt(F,dtol,2);
522   ierr = PetscTryMethod(F,"MatSuperluSetILUDropTol_C",(Mat,PetscReal),(F,dtol));CHKERRQ(ierr);
523   PetscFunctionReturn(0);
524 }
525 
526 EXTERN_C_BEGIN
527 #undef __FUNCT__
528 #define __FUNCT__ "MatFactorGetSolverPackage_seqaij_superlu"
529 PetscErrorCode MatFactorGetSolverPackage_seqaij_superlu(Mat A,const MatSolverPackage *type)
530 {
531   PetscFunctionBegin;
532   *type = MATSOLVERSUPERLU;
533   PetscFunctionReturn(0);
534 }
535 EXTERN_C_END
536 
537 
538 /*MC
539   MATSOLVERSUPERLU = "superlu" - A solver package providing solvers LU and ILU for sequential matrices
540   via the external package SuperLU.
541 
542   Use ./configure --download-superlu to have PETSc installed with SuperLU
543 
544   Options Database Keys:
545 + -mat_superlu_equil <FALSE>            - Equil (None)
546 . -mat_superlu_colperm <COLAMD>         - (choose one of) NATURAL MMD_ATA MMD_AT_PLUS_A COLAMD
547 . -mat_superlu_iterrefine <NOREFINE>    - (choose one of) NOREFINE SINGLE DOUBLE EXTRA
548 . -mat_superlu_symmetricmode: <FALSE>   - SymmetricMode (None)
549 . -mat_superlu_diagpivotthresh <1>      - DiagPivotThresh (None)
550 . -mat_superlu_pivotgrowth <FALSE>      - PivotGrowth (None)
551 . -mat_superlu_conditionnumber <FALSE>  - ConditionNumber (None)
552 . -mat_superlu_rowperm <NOROWPERM>      - (choose one of) NOROWPERM LargeDiag
553 . -mat_superlu_replacetinypivot <FALSE> - ReplaceTinyPivot (None)
554 . -mat_superlu_printstat <FALSE>        - PrintStat (None)
555 . -mat_superlu_lwork <0>                - size of work array in bytes used by factorization (None)
556 . -mat_superlu_ilu_droptol <0>          - ILU_DropTol (None)
557 . -mat_superlu_ilu_filltol <0>          - ILU_FillTol (None)
558 . -mat_superlu_ilu_fillfactor <0>       - ILU_FillFactor (None)
559 . -mat_superlu_ilu_droprull <0>         - ILU_DropRule (None)
560 . -mat_superlu_ilu_norm <0>             - ILU_Norm (None)
561 - -mat_superlu_ilu_milu <0>             - ILU_MILU (None)
562 
563    Notes: Do not confuse this with MATSOLVERSUPERLU_DIST which is for parallel sparse solves
564 
565    Level: beginner
566 
567 .seealso: PCLU, PCILU, MATSOLVERSUPERLU_DIST, MATSOLVERMUMPS, PCFactorSetMatSolverPackage(), MatSolverPackage
568 M*/
569 
570 EXTERN_C_BEGIN
571 #undef __FUNCT__
572 #define __FUNCT__ "MatGetFactor_seqaij_superlu"
573 PetscErrorCode MatGetFactor_seqaij_superlu(Mat A,MatFactorType ftype,Mat *F)
574 {
575   Mat            B;
576   Mat_SuperLU    *lu;
577   PetscErrorCode ierr;
578   PetscInt       indx,m=A->rmap->n,n=A->cmap->n;
579   PetscBool      flg;
580   PetscReal      real_input;
581   const char     *colperm[]   ={"NATURAL","MMD_ATA","MMD_AT_PLUS_A","COLAMD"}; /* MY_PERMC - not supported by the petsc interface yet */
582   const char     *iterrefine[]={"NOREFINE", "SINGLE", "DOUBLE", "EXTRA"};
583   const char     *rowperm[]   ={"NOROWPERM", "LargeDiag"}; /* MY_PERMC - not supported by the petsc interface yet */
584 
585   PetscFunctionBegin;
586   ierr = MatCreate(((PetscObject)A)->comm,&B);CHKERRQ(ierr);
587   ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
588   ierr = MatSetType(B,((PetscObject)A)->type_name);CHKERRQ(ierr);
589   ierr = MatSeqAIJSetPreallocation(B,0,NULL);CHKERRQ(ierr);
590 
591   if (ftype == MAT_FACTOR_LU || ftype == MAT_FACTOR_ILU) {
592     B->ops->lufactorsymbolic  = MatLUFactorSymbolic_SuperLU;
593     B->ops->ilufactorsymbolic = MatLUFactorSymbolic_SuperLU;
594   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor type not supported");
595 
596   B->ops->destroy = MatDestroy_SuperLU;
597   B->ops->view    = MatView_SuperLU;
598   B->factortype   = ftype;
599   B->assembled    = PETSC_TRUE;           /* required by -ksp_view */
600   B->preallocated = PETSC_TRUE;
601 
602   ierr = PetscNewLog(B,Mat_SuperLU,&lu);CHKERRQ(ierr);
603 
604   if (ftype == MAT_FACTOR_LU) {
605     set_default_options(&lu->options);
606     /* Comments from SuperLU_4.0/SRC/dgssvx.c:
607       "Whether or not the system will be equilibrated depends on the
608        scaling of the matrix A, but if equilibration is used, A is
609        overwritten by diag(R)*A*diag(C) and B by diag(R)*B
610        (if options->Trans=NOTRANS) or diag(C)*B (if options->Trans = TRANS or CONJ)."
611      We set 'options.Equil = NO' as default because additional space is needed for it.
612     */
613     lu->options.Equil = NO;
614   } else if (ftype == MAT_FACTOR_ILU) {
615     /* Set the default input options of ilu: */
616     PetscStackCall("SuperLU:ilu_set_default_options",ilu_set_default_options(&lu->options));
617   }
618   lu->options.PrintStat = NO;
619 
620   /* Initialize the statistics variables. */
621   PetscStackCall("SuperLU:StatInit",StatInit(&lu->stat));
622   lu->lwork = 0;   /* allocate space internally by system malloc */
623 
624   ierr = PetscOptionsBegin(((PetscObject)A)->comm,((PetscObject)A)->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
625   ierr = PetscOptionsBool("-mat_superlu_equil","Equil","None",(PetscBool)lu->options.Equil,(PetscBool*)&lu->options.Equil,0);CHKERRQ(ierr);
626   ierr = PetscOptionsEList("-mat_superlu_colperm","ColPerm","None",colperm,4,colperm[3],&indx,&flg);CHKERRQ(ierr);
627   if (flg) lu->options.ColPerm = (colperm_t)indx;
628   ierr = PetscOptionsEList("-mat_superlu_iterrefine","IterRefine","None",iterrefine,4,iterrefine[0],&indx,&flg);CHKERRQ(ierr);
629   if (flg) lu->options.IterRefine = (IterRefine_t)indx;
630   ierr = PetscOptionsBool("-mat_superlu_symmetricmode","SymmetricMode","None",(PetscBool)lu->options.SymmetricMode,&flg,0);CHKERRQ(ierr);
631   if (flg) lu->options.SymmetricMode = YES;
632   ierr = PetscOptionsReal("-mat_superlu_diagpivotthresh","DiagPivotThresh","None",lu->options.DiagPivotThresh,&real_input,&flg);CHKERRQ(ierr);
633   if (flg) lu->options.DiagPivotThresh = (double) real_input;
634   ierr = PetscOptionsBool("-mat_superlu_pivotgrowth","PivotGrowth","None",(PetscBool)lu->options.PivotGrowth,&flg,0);CHKERRQ(ierr);
635   if (flg) lu->options.PivotGrowth = YES;
636   ierr = PetscOptionsBool("-mat_superlu_conditionnumber","ConditionNumber","None",(PetscBool)lu->options.ConditionNumber,&flg,0);CHKERRQ(ierr);
637   if (flg) lu->options.ConditionNumber = YES;
638   ierr = PetscOptionsEList("-mat_superlu_rowperm","rowperm","None",rowperm,2,rowperm[lu->options.RowPerm],&indx,&flg);CHKERRQ(ierr);
639   if (flg) lu->options.RowPerm = (rowperm_t)indx;
640   ierr = PetscOptionsBool("-mat_superlu_replacetinypivot","ReplaceTinyPivot","None",(PetscBool)lu->options.ReplaceTinyPivot,&flg,0);CHKERRQ(ierr);
641   if (flg) lu->options.ReplaceTinyPivot = YES;
642   ierr = PetscOptionsBool("-mat_superlu_printstat","PrintStat","None",(PetscBool)lu->options.PrintStat,&flg,0);CHKERRQ(ierr);
643   if (flg) lu->options.PrintStat = YES;
644   ierr = PetscOptionsInt("-mat_superlu_lwork","size of work array in bytes used by factorization","None",lu->lwork,&lu->lwork,NULL);CHKERRQ(ierr);
645   if (lu->lwork > 0) {
646     ierr = PetscMalloc(lu->lwork,&lu->work);CHKERRQ(ierr);
647   } else if (lu->lwork != 0 && lu->lwork != -1) {
648     ierr      = PetscPrintf(PETSC_COMM_SELF,"   Warning: lwork %D is not supported by SUPERLU. The default lwork=0 is used.\n",lu->lwork);
649     lu->lwork = 0;
650   }
651   /* ilu options */
652   ierr = PetscOptionsReal("-mat_superlu_ilu_droptol","ILU_DropTol","None",lu->options.ILU_DropTol,&real_input,&flg);CHKERRQ(ierr);
653   if (flg) lu->options.ILU_DropTol = (double) real_input;
654   ierr = PetscOptionsReal("-mat_superlu_ilu_filltol","ILU_FillTol","None",lu->options.ILU_FillTol,&real_input,&flg);CHKERRQ(ierr);
655   if (flg) lu->options.ILU_FillTol = (double) real_input;
656   ierr = PetscOptionsReal("-mat_superlu_ilu_fillfactor","ILU_FillFactor","None",lu->options.ILU_FillFactor,&real_input,&flg);CHKERRQ(ierr);
657   if (flg) lu->options.ILU_FillFactor = (double) real_input;
658   ierr = PetscOptionsInt("-mat_superlu_ilu_droprull","ILU_DropRule","None",lu->options.ILU_DropRule,&lu->options.ILU_DropRule,NULL);CHKERRQ(ierr);
659   ierr = PetscOptionsInt("-mat_superlu_ilu_norm","ILU_Norm","None",lu->options.ILU_Norm,&indx,&flg);CHKERRQ(ierr);
660   if (flg) lu->options.ILU_Norm = (norm_t)indx;
661   ierr = PetscOptionsInt("-mat_superlu_ilu_milu","ILU_MILU","None",lu->options.ILU_MILU,&indx,&flg);CHKERRQ(ierr);
662   if (flg) lu->options.ILU_MILU = (milu_t)indx;
663   PetscOptionsEnd();
664   if (lu->options.Equil == YES) {
665     /* superlu overwrites input matrix and rhs when Equil is used, thus create A_dup to keep user's A unchanged */
666     ierr = MatDuplicate_SeqAIJ(A,MAT_COPY_VALUES,&lu->A_dup);CHKERRQ(ierr);
667   }
668 
669   /* Allocate spaces (notice sizes are for the transpose) */
670   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->etree);CHKERRQ(ierr);
671   ierr = PetscMalloc(n*sizeof(PetscInt),&lu->perm_r);CHKERRQ(ierr);
672   ierr = PetscMalloc(m*sizeof(PetscInt),&lu->perm_c);CHKERRQ(ierr);
673   ierr = PetscMalloc(n*sizeof(PetscScalar),&lu->R);CHKERRQ(ierr);
674   ierr = PetscMalloc(m*sizeof(PetscScalar),&lu->C);CHKERRQ(ierr);
675 
676   /* create rhs and solution x without allocate space for .Store */
677 #if defined(PETSC_USE_COMPLEX)
678 #if defined(PETSC_USE_REAL_SINGLE)
679   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
680   PetscStackCall("SuperLU:cCreate_Dense_Matrix(",cCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_C, SLU_GE));
681 #else
682   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
683   PetscStackCall("SuperLU:zCreate_Dense_Matrix",zCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_Z, SLU_GE));
684 #endif
685 #else
686 #if defined(PETSC_USE_REAL_SINGLE)
687   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
688   PetscStackCall("SuperLU:sCreate_Dense_Matrix",sCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_S, SLU_GE));
689 #else
690   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->B, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
691   PetscStackCall("SuperLU:dCreate_Dense_Matrix",dCreate_Dense_Matrix(&lu->X, m, 1, NULL, m, SLU_DN, SLU_D, SLU_GE));
692 #endif
693 #endif
694 
695 #if defined(SUPERLU2)
696   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatCreateNull","MatCreateNull_SuperLU",(void (*)(void))MatCreateNull_SuperLU);CHKERRQ(ierr);
697 #endif
698   ierr     = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatFactorGetSolverPackage_C","MatFactorGetSolverPackage_seqaij_superlu",MatFactorGetSolverPackage_seqaij_superlu);CHKERRQ(ierr);
699   ierr     = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSuperluSetILUDropTol_C","MatSuperluSetILUDropTol_SuperLU",MatSuperluSetILUDropTol_SuperLU);CHKERRQ(ierr);
700   B->spptr = lu;
701   *F       = B;
702   PetscFunctionReturn(0);
703 }
704 EXTERN_C_END
705 
706