xref: /petsc/src/mat/impls/aij/seq/superlu/superlu.c (revision 75af56d4b99cf21fbbeb8b3a463108682dd46af8)
114b396bbSKris Buschelman /*$Id: superlu.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/
214b396bbSKris Buschelman 
314b396bbSKris Buschelman /*
4*75af56d4SHong Zhang         Provides an interface to the SuperLU 3.0 sparse solver
514b396bbSKris Buschelman */
614b396bbSKris Buschelman 
714b396bbSKris Buschelman #include "src/mat/impls/aij/seq/aij.h"
814b396bbSKris Buschelman 
914b396bbSKris Buschelman EXTERN_C_BEGIN
1014b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
1114b396bbSKris Buschelman #include "zsp_defs.h"
1214b396bbSKris Buschelman #else
1314b396bbSKris Buschelman #include "dsp_defs.h"
1414b396bbSKris Buschelman #endif
1514b396bbSKris Buschelman #include "util.h"
1614b396bbSKris Buschelman EXTERN_C_END
1714b396bbSKris Buschelman 
1814b396bbSKris Buschelman typedef struct {
19*75af56d4SHong Zhang   SuperMatrix       A,L,U,B,X;
20*75af56d4SHong Zhang   superlu_options_t options;
21*75af56d4SHong Zhang   int               *perm_c; /* column permutation vector */
22*75af56d4SHong Zhang   int               *perm_r; /* row permutations from partial pivoting */
23*75af56d4SHong Zhang   int               *etree;
24*75af56d4SHong Zhang   double            *R, *C;
25*75af56d4SHong Zhang   char              equed[1];
26*75af56d4SHong Zhang   int               lwork;
27*75af56d4SHong Zhang   void              *work;
28*75af56d4SHong Zhang   double            rpg, rcond;
29*75af56d4SHong Zhang   mem_usage_t       mem_usage;
30*75af56d4SHong Zhang   MatStructure      flg;
31*75af56d4SHong Zhang   /*
3214b396bbSKris Buschelman   SuperMatrix  A,B,AC,L,U;
3314b396bbSKris Buschelman   int          *perm_r,*perm_c,ispec,relax,panel_size;
3414b396bbSKris Buschelman   double       pivot_threshold;
3514b396bbSKris Buschelman   NCformat     *store;
3614b396bbSKris Buschelman   MatStructure flg;
3714b396bbSKris Buschelman   PetscTruth   SuperluMatOdering;
38*75af56d4SHong Zhang   */
3914b396bbSKris Buschelman 
4014b396bbSKris Buschelman   /* A few function pointers for inheritance */
41f0c56d0fSKris Buschelman   int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*);
4214b396bbSKris Buschelman   int (*MatView)(Mat,PetscViewer);
4314b396bbSKris Buschelman   int (*MatAssemblyEnd)(Mat,MatAssemblyType);
44b0592601SKris Buschelman   int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*);
4514b396bbSKris Buschelman   int (*MatDestroy)(Mat);
4614b396bbSKris Buschelman 
4714b396bbSKris Buschelman   /* Flag to clean up (non-global) SuperLU objects during Destroy */
4814b396bbSKris Buschelman   PetscTruth CleanUpSuperLU;
49f0c56d0fSKris Buschelman } Mat_SuperLU;
5014b396bbSKris Buschelman 
5114b396bbSKris Buschelman 
52f0c56d0fSKris Buschelman EXTERN int MatFactorInfo_SuperLU(Mat,PetscViewer);
53f0c56d0fSKris Buschelman EXTERN int MatLUFactorSymbolic_SuperLU(Mat,IS,IS,MatFactorInfo*,Mat*);
54b0592601SKris Buschelman 
55b0592601SKris Buschelman EXTERN_C_BEGIN
56b0592601SKris Buschelman EXTERN int MatConvert_SuperLU_SeqAIJ(Mat,MatType,Mat*);
57b0592601SKris Buschelman EXTERN int MatConvert_SeqAIJ_SuperLU(Mat,MatType,Mat*);
58b0592601SKris Buschelman EXTERN_C_END
5914b396bbSKris Buschelman 
6014b396bbSKris Buschelman #undef __FUNCT__
61f0c56d0fSKris Buschelman #define __FUNCT__ "MatDestroy_SuperLU"
62f0c56d0fSKris Buschelman int MatDestroy_SuperLU(Mat A)
6314b396bbSKris Buschelman {
64460c4903SKris Buschelman   int         ierr;
65f0c56d0fSKris Buschelman   Mat_SuperLU *lu = (Mat_SuperLU*)A->spptr;
6614b396bbSKris Buschelman 
6714b396bbSKris Buschelman   PetscFunctionBegin;
68*75af56d4SHong Zhang   if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */
69*75af56d4SHong Zhang     /* Destroy_CompCol_Matrix(&lu->A);  */  /* hangs inside memory.c! */
70*75af56d4SHong Zhang     Destroy_SuperMatrix_Store(&lu->A);
71*75af56d4SHong Zhang     Destroy_SuperMatrix_Store(&lu->B);
72*75af56d4SHong Zhang     Destroy_SuperMatrix_Store(&lu->X);
73*75af56d4SHong Zhang     SUPERLU_FREE (lu->etree);
74*75af56d4SHong Zhang     SUPERLU_FREE (lu->perm_r);
75*75af56d4SHong Zhang     SUPERLU_FREE (lu->perm_c);
76*75af56d4SHong Zhang     SUPERLU_FREE (lu->R);
77*75af56d4SHong Zhang     SUPERLU_FREE (lu->C);
78*75af56d4SHong Zhang     if ( lu->lwork >= 0 ) {
79fb3e25aaSKris Buschelman       Destroy_SuperNode_Matrix(&lu->L);
80fb3e25aaSKris Buschelman       Destroy_CompCol_Matrix(&lu->U);
81*75af56d4SHong Zhang     }
82fb3e25aaSKris Buschelman   }
83b0592601SKris Buschelman   ierr = MatConvert_SuperLU_SeqAIJ(A,MATSEQAIJ,&A);CHKERRQ(ierr);
84fb3e25aaSKris Buschelman   ierr = (*A->ops->destroy)(A);CHKERRQ(ierr);
8514b396bbSKris Buschelman   PetscFunctionReturn(0);
8614b396bbSKris Buschelman }
8714b396bbSKris Buschelman 
8814b396bbSKris Buschelman #undef __FUNCT__
89f0c56d0fSKris Buschelman #define __FUNCT__ "MatView_SuperLU"
90f0c56d0fSKris Buschelman int MatView_SuperLU(Mat A,PetscViewer viewer)
9114b396bbSKris Buschelman {
9214b396bbSKris Buschelman   int               ierr;
9314b396bbSKris Buschelman   PetscTruth        isascii;
9414b396bbSKris Buschelman   PetscViewerFormat format;
95f0c56d0fSKris Buschelman   Mat_SuperLU       *lu=(Mat_SuperLU*)(A->spptr);
9614b396bbSKris Buschelman 
9714b396bbSKris Buschelman   PetscFunctionBegin;
9814b396bbSKris Buschelman   ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr);
9914b396bbSKris Buschelman 
10014b396bbSKris Buschelman   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
10114b396bbSKris Buschelman   if (isascii) {
10214b396bbSKris Buschelman     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
10314b396bbSKris Buschelman     if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
104f0c56d0fSKris Buschelman       ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr);
10514b396bbSKris Buschelman     }
10614b396bbSKris Buschelman   }
10714b396bbSKris Buschelman   PetscFunctionReturn(0);
10814b396bbSKris Buschelman }
10914b396bbSKris Buschelman 
11014b396bbSKris Buschelman #undef __FUNCT__
111f0c56d0fSKris Buschelman #define __FUNCT__ "MatAssemblyEnd_SuperLU"
112f0c56d0fSKris Buschelman int MatAssemblyEnd_SuperLU(Mat A,MatAssemblyType mode) {
11314b396bbSKris Buschelman   int         ierr;
114f0c56d0fSKris Buschelman   Mat_SuperLU *lu=(Mat_SuperLU*)(A->spptr);
11514b396bbSKris Buschelman 
11614b396bbSKris Buschelman   PetscFunctionBegin;
11714b396bbSKris Buschelman   ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr);
118b0592601SKris Buschelman 
119b0592601SKris Buschelman   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
120f0c56d0fSKris Buschelman   A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU;
12114b396bbSKris Buschelman   PetscFunctionReturn(0);
12214b396bbSKris Buschelman }
12314b396bbSKris Buschelman 
124*75af56d4SHong Zhang /* This function was written for SuperLU 2.0 by Matthew Knepley. Not tested for SuperLU 3.0! */
125*75af56d4SHong Zhang #ifdef SuperLU2
12614b396bbSKris Buschelman #include "src/mat/impls/dense/seq/dense.h"
12714b396bbSKris Buschelman #undef __FUNCT__
128f0c56d0fSKris Buschelman #define __FUNCT__ "MatCreateNull_SuperLU"
129f0c56d0fSKris Buschelman int MatCreateNull_SuperLU(Mat A,Mat *nullMat)
13014b396bbSKris Buschelman {
131f0c56d0fSKris Buschelman   Mat_SuperLU   *lu = (Mat_SuperLU*)A->spptr;
13214b396bbSKris Buschelman   int           numRows = A->m,numCols = A->n;
13314b396bbSKris Buschelman   SCformat      *Lstore;
13414b396bbSKris Buschelman   int           numNullCols,size;
135*75af56d4SHong Zhang   SuperLUStat_t stat;
13614b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
13714b396bbSKris Buschelman   doublecomplex *nullVals,*workVals;
13814b396bbSKris Buschelman #else
13914b396bbSKris Buschelman   PetscScalar   *nullVals,*workVals;
14014b396bbSKris Buschelman #endif
14114b396bbSKris Buschelman   int           row,newRow,col,newCol,block,b,ierr;
14214b396bbSKris Buschelman 
14314b396bbSKris Buschelman   PetscFunctionBegin;
14414b396bbSKris Buschelman   PetscValidHeaderSpecific(A,MAT_COOKIE);
14514b396bbSKris Buschelman   PetscValidPointer(nullMat);
14614b396bbSKris Buschelman   if (!A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Unfactored matrix");
14714b396bbSKris Buschelman   numNullCols = numCols - numRows;
14814b396bbSKris Buschelman   if (numNullCols < 0) SETERRQ(PETSC_ERR_ARG_WRONG,"Function only applies to underdetermined problems");
14914b396bbSKris Buschelman   /* Create the null matrix */
15014b396bbSKris Buschelman   ierr = MatCreateSeqDense(A->comm,numRows,numNullCols,PETSC_NULL,nullMat);CHKERRQ(ierr);
15114b396bbSKris Buschelman   if (numNullCols == 0) {
15214b396bbSKris Buschelman     ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15314b396bbSKris Buschelman     ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
15414b396bbSKris Buschelman     PetscFunctionReturn(0);
15514b396bbSKris Buschelman   }
15614b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
15714b396bbSKris Buschelman   nullVals = (doublecomplex*)((Mat_SeqDense*)(*nullMat)->data)->v;
15814b396bbSKris Buschelman #else
15914b396bbSKris Buschelman   nullVals = ((Mat_SeqDense*)(*nullMat)->data)->v;
16014b396bbSKris Buschelman #endif
16114b396bbSKris Buschelman   /* Copy in the columns */
16214b396bbSKris Buschelman   Lstore = (SCformat*)lu->L.Store;
16314b396bbSKris Buschelman   for(block = 0; block <= Lstore->nsuper; block++) {
16414b396bbSKris Buschelman     newRow = Lstore->sup_to_col[block];
16514b396bbSKris Buschelman     size   = Lstore->sup_to_col[block+1] - Lstore->sup_to_col[block];
16614b396bbSKris Buschelman     for(col = Lstore->rowind_colptr[newRow]; col < Lstore->rowind_colptr[newRow+1]; col++) {
16714b396bbSKris Buschelman       newCol = Lstore->rowind[col];
16814b396bbSKris Buschelman       if (newCol >= numRows) {
16914b396bbSKris Buschelman         for(b = 0; b < size; b++)
17014b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
17114b396bbSKris Buschelman           nullVals[(newCol-numRows)*numRows+newRow+b] = ((doublecomplex*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col];
17214b396bbSKris Buschelman #else
17314b396bbSKris Buschelman           nullVals[(newCol-numRows)*numRows+newRow+b] = ((double*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col];
17414b396bbSKris Buschelman #endif
17514b396bbSKris Buschelman       }
17614b396bbSKris Buschelman     }
17714b396bbSKris Buschelman   }
17814b396bbSKris Buschelman   /* Permute rhs to form P^T_c B */
17914b396bbSKris Buschelman   ierr = PetscMalloc(numRows*sizeof(double),&workVals);CHKERRQ(ierr);
18014b396bbSKris Buschelman   for(b = 0; b < numNullCols; b++) {
18114b396bbSKris Buschelman     for(row = 0; row < numRows; row++) workVals[lu->perm_c[row]] = nullVals[b*numRows+row];
18214b396bbSKris Buschelman     for(row = 0; row < numRows; row++) nullVals[b*numRows+row]   = workVals[row];
18314b396bbSKris Buschelman   }
18414b396bbSKris Buschelman   /* Backward solve the upper triangle A x = b */
18514b396bbSKris Buschelman   for(b = 0; b < numNullCols; b++) {
18614b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
187*75af56d4SHong Zhang     sp_ztrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&stat,&ierr);
18814b396bbSKris Buschelman #else
189*75af56d4SHong Zhang     sp_dtrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&stat,&ierr);
19014b396bbSKris Buschelman #endif
19114b396bbSKris Buschelman     if (ierr < 0)
19214b396bbSKris Buschelman       SETERRQ1(PETSC_ERR_ARG_WRONG,"The argument %d was invalid",-ierr);
19314b396bbSKris Buschelman   }
19414b396bbSKris Buschelman   ierr = PetscFree(workVals);CHKERRQ(ierr);
19514b396bbSKris Buschelman 
19614b396bbSKris Buschelman   ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
19714b396bbSKris Buschelman   ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
19814b396bbSKris Buschelman   PetscFunctionReturn(0);
19914b396bbSKris Buschelman }
200*75af56d4SHong Zhang #endif
20114b396bbSKris Buschelman 
20214b396bbSKris Buschelman #undef __FUNCT__
203f0c56d0fSKris Buschelman #define __FUNCT__ "MatSolve_SuperLU"
204f0c56d0fSKris Buschelman int MatSolve_SuperLU(Mat A,Vec b,Vec x)
20514b396bbSKris Buschelman {
206f0c56d0fSKris Buschelman   Mat_SuperLU   *lu = (Mat_SuperLU*)A->spptr;
207*75af56d4SHong Zhang   PetscScalar   *barray,*xarray;
208*75af56d4SHong Zhang   int           m,n,ierr,lwork,info,i;
209*75af56d4SHong Zhang   SuperLUStat_t stat;
210*75af56d4SHong Zhang   double        ferr,berr,*rhsb,*rhsx;
21114b396bbSKris Buschelman 
21214b396bbSKris Buschelman   PetscFunctionBegin;
213*75af56d4SHong Zhang   /* rhs vector */
214*75af56d4SHong Zhang   lu->B.ncol = 1;   /* Set the number of right-hand side */
215*75af56d4SHong Zhang   ierr = VecGetArray(b,&barray);CHKERRQ(ierr);
216*75af56d4SHong Zhang   ((DNformat*)lu->B.Store)->nzval = barray;
217*75af56d4SHong Zhang 
218*75af56d4SHong Zhang   /* solution vector */
219*75af56d4SHong Zhang   ierr = VecGetArray(x,&xarray);CHKERRQ(ierr);
220*75af56d4SHong Zhang   ((DNformat*)lu->X.Store)->nzval = xarray;
221*75af56d4SHong Zhang 
222*75af56d4SHong Zhang   /* Initialize the statistics variables. */
223*75af56d4SHong Zhang   StatInit(&stat);
224*75af56d4SHong Zhang 
225*75af56d4SHong Zhang   lu->options.Fact  = FACTORED; /* Indicate the factored form of A is supplied. */
226*75af56d4SHong Zhang   lu->options.Trans = TRANS;
227*75af56d4SHong Zhang   dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
228*75af56d4SHong Zhang            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
229*75af56d4SHong Zhang            &lu->mem_usage, &stat, &info);
230*75af56d4SHong Zhang 
231*75af56d4SHong Zhang   ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr);
232*75af56d4SHong Zhang   ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr);
233*75af56d4SHong Zhang 
234*75af56d4SHong Zhang   if ( info == 0 || info == lu->A.ncol+1 ) {
235*75af56d4SHong Zhang     if ( lu->options.IterRefine ) {
236*75af56d4SHong Zhang       printf("Iterative Refinement:\n");
237*75af56d4SHong Zhang       printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR");
238*75af56d4SHong Zhang       for (i = 0; i < 1; ++i)
239*75af56d4SHong Zhang         printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr, berr);
240*75af56d4SHong Zhang     }
241*75af56d4SHong Zhang     fflush(stdout);
242*75af56d4SHong Zhang   } else if ( info > 0 && lu->lwork == -1 ) {
243*75af56d4SHong Zhang     printf("** Estimated memory: %d bytes\n", info - n);
24414b396bbSKris Buschelman   }
24514b396bbSKris Buschelman 
246*75af56d4SHong Zhang   if ( lu->options.PrintStat ) StatPrint(&stat);
247*75af56d4SHong Zhang   StatFree(&stat);
248*75af56d4SHong Zhang   PetscFunctionReturn(0);
249*75af56d4SHong Zhang }
25014b396bbSKris Buschelman 
25114b396bbSKris Buschelman #undef __FUNCT__
252f0c56d0fSKris Buschelman #define __FUNCT__ "MatLUFactorNumeric_SuperLU"
253f0c56d0fSKris Buschelman int MatLUFactorNumeric_SuperLU(Mat A,Mat *F)
25414b396bbSKris Buschelman {
25514b396bbSKris Buschelman   Mat_SeqAIJ    *aa = (Mat_SeqAIJ*)(A)->data;
256f0c56d0fSKris Buschelman   Mat_SuperLU   *lu = (Mat_SuperLU*)(*F)->spptr;
257*75af56d4SHong Zhang   int           ierr,info;
25814b396bbSKris Buschelman   PetscTruth    flag;
259*75af56d4SHong Zhang   SuperLUStat_t stat;
260*75af56d4SHong Zhang   double        ferr, berr;
26114b396bbSKris Buschelman 
26214b396bbSKris Buschelman   PetscFunctionBegin;
263*75af56d4SHong Zhang   if (lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */
264*75af56d4SHong Zhang     /* Set SuperLU options */
265*75af56d4SHong Zhang     /* the default values for options argument:
266*75af56d4SHong Zhang 	options.Fact = DOFACT;
267*75af56d4SHong Zhang         options.Equil = YES;
268*75af56d4SHong Zhang     	options.ColPerm = COLAMD;
269*75af56d4SHong Zhang 	options.DiagPivotThresh = 1.0;
270*75af56d4SHong Zhang     	options.Trans = NOTRANS;
271*75af56d4SHong Zhang     	options.IterRefine = NOREFINE;
272*75af56d4SHong Zhang     	options.SymmetricMode = NO;
273*75af56d4SHong Zhang     	options.PivotGrowth = NO;
274*75af56d4SHong Zhang     	options.ConditionNumber = NO;
275*75af56d4SHong Zhang     	options.PrintStat = YES;
27614b396bbSKris Buschelman     */
277*75af56d4SHong Zhang     set_default_options(&lu->options);
278*75af56d4SHong Zhang     lu->options.Equil = NO;  /* equilibration causes error in solve */
279*75af56d4SHong Zhang   } else { /* successing numerical factorization */
280*75af56d4SHong Zhang     /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */
281*75af56d4SHong Zhang     Destroy_SuperMatrix_Store(&lu->A);
282*75af56d4SHong Zhang     if ( lu->lwork >= 0 ) { /* Deallocate storage associated with L and U. */
283*75af56d4SHong Zhang       Destroy_SuperNode_Matrix(&lu->L);
284*75af56d4SHong Zhang       Destroy_CompCol_Matrix(&lu->U);
285*75af56d4SHong Zhang       lu->options.Fact = SamePattern;
28614b396bbSKris Buschelman     }
287*75af56d4SHong Zhang   }
28814b396bbSKris Buschelman 
289*75af56d4SHong Zhang   /* Create the SuperMatrix for lu->A=A^T:
290*75af56d4SHong Zhang        Since SuperLU likes column-oriented matrices,we pass it the transpose,
291*75af56d4SHong Zhang        and then solve A^T X = B in MatSolve(). */
292*75af56d4SHong Zhang #if defined(PETSC_USE_COMPLEX)
293*75af56d4SHong Zhang   zCreate_CompCol_Matrix(&lu->A,A->n,A->m,aa->nz,aa->a,aa->j,aa->i,
294*75af56d4SHong Zhang                            SLU_NC,SLU_Z,SLU_GE);
295*75af56d4SHong Zhang #else
296*75af56d4SHong Zhang   dCreate_CompCol_Matrix(&lu->A,A->n,A->m,aa->nz,aa->a,aa->j,aa->i,
297*75af56d4SHong Zhang                            SLU_NC,SLU_D,SLU_GE);
298*75af56d4SHong Zhang #endif
299*75af56d4SHong Zhang 
300*75af56d4SHong Zhang   /* Initialize the statistics variables. */
301*75af56d4SHong Zhang   StatInit(&stat);
302*75af56d4SHong Zhang 
303*75af56d4SHong Zhang   lu->lwork = 0;   /* allocate space internally by system malloc */
304*75af56d4SHong Zhang   lu->B.ncol = 0;  /* Indicate not to solve the system */
305*75af56d4SHong Zhang   dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C,
306*75af56d4SHong Zhang            &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr,
307*75af56d4SHong Zhang            &lu->mem_usage, &stat, &info);
308*75af56d4SHong Zhang 
309*75af56d4SHong Zhang   if ( info == 0 || info == lu->A.ncol+1 ) {
310*75af56d4SHong Zhang     if ( lu->options.PivotGrowth ) printf("Recip. pivot growth = %e\n", lu->rpg);
311*75af56d4SHong Zhang     if ( lu->options.ConditionNumber )
312*75af56d4SHong Zhang       printf("Recip. condition number = %e\n", lu->rcond);
313*75af56d4SHong Zhang         /*
314*75af56d4SHong Zhang           NCformat       *Ustore;
315*75af56d4SHong Zhang           SCformat       *Lstore;
316*75af56d4SHong Zhang         Lstore = (SCformat *) lu->L.Store;
317*75af56d4SHong Zhang         Ustore = (NCformat *) lu->U.Store;
318*75af56d4SHong Zhang 	printf("No of nonzeros in factor L = %d\n", Lstore->nnz);
319*75af56d4SHong Zhang     	printf("No of nonzeros in factor U = %d\n", Ustore->nnz);
320*75af56d4SHong Zhang     	printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n);
321*75af56d4SHong Zhang 	printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n",
322*75af56d4SHong Zhang 	       mem_usage.for_lu/1e6, mem_usage.total_needed/1e6,
323*75af56d4SHong Zhang 	       mem_usage.expansions);
324*75af56d4SHong Zhang         */
325*75af56d4SHong Zhang     fflush(stdout);
326*75af56d4SHong Zhang 
327*75af56d4SHong Zhang   } else if ( info > 0 && lu->lwork == -1 ) {
328*75af56d4SHong Zhang     printf("** Estimated memory: %d bytes\n", info - lu->A.ncol);
329*75af56d4SHong Zhang   }
330*75af56d4SHong Zhang 
331*75af56d4SHong Zhang   if ( lu->options.PrintStat ) StatPrint(&stat);
332*75af56d4SHong Zhang   StatFree(&stat);
333*75af56d4SHong Zhang 
334*75af56d4SHong Zhang   lu->flg = SAME_NONZERO_PATTERN;
335*75af56d4SHong Zhang   PetscFunctionReturn(0);
336*75af56d4SHong Zhang 
337*75af56d4SHong Zhang #ifdef OLD
33814b396bbSKris Buschelman   /* Set SuperLU options */
33914b396bbSKris Buschelman   lu->relax      = sp_ienv(2);
34014b396bbSKris Buschelman   lu->panel_size = sp_ienv(1);
34114b396bbSKris Buschelman   /* We have to initialize global data or SuperLU crashes (sucky design) */
34214b396bbSKris Buschelman   if (!StatInitCalled) {
34314b396bbSKris Buschelman     StatInit(lu->panel_size,lu->relax);
34414b396bbSKris Buschelman   }
34514b396bbSKris Buschelman   StatInitCalled++;
34614b396bbSKris Buschelman 
34714b396bbSKris Buschelman   ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU Options","Mat");CHKERRQ(ierr);
34814b396bbSKris Buschelman   /* use SuperLU mat ordeing */
34914b396bbSKris Buschelman   ierr = PetscOptionsInt("-mat_superlu_ordering","SuperLU ordering type (one of 0, 1, 2, 3)\n   0: natural ordering;\n   1: MMD applied to A'*A;\n   2: MMD applied to A'+A;\n   3: COLAMD, approximate minimum degree column ordering","None",lu->ispec,&lu->ispec,&flag);CHKERRQ(ierr);
35014b396bbSKris Buschelman   if (flag) {
35114b396bbSKris Buschelman     get_perm_c(lu->ispec, &lu->A, lu->perm_c);
35214b396bbSKris Buschelman     lu->SuperluMatOdering = PETSC_TRUE;
35314b396bbSKris Buschelman   }
35414b396bbSKris Buschelman   PetscOptionsEnd();
35514b396bbSKris Buschelman 
35614b396bbSKris Buschelman   /* Create the elimination tree */
35714b396bbSKris Buschelman   ierr = PetscMalloc(A->n*sizeof(int),&etree);CHKERRQ(ierr);
35814b396bbSKris Buschelman   sp_preorder("N",&lu->A,lu->perm_c,etree,&lu->AC);
35914b396bbSKris Buschelman   /* Factor the matrix */
36014b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX)
36114b396bbSKris Buschelman   zgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr);
36214b396bbSKris Buschelman #else
36314b396bbSKris Buschelman   dgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr);
36414b396bbSKris Buschelman #endif
36514b396bbSKris Buschelman   if (ierr < 0) {
36614b396bbSKris Buschelman     SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr);
36714b396bbSKris Buschelman   } else if (ierr > 0) {
36814b396bbSKris Buschelman     if (ierr <= A->m) {
36914b396bbSKris Buschelman       SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element %d of U is exactly zero",ierr);
37014b396bbSKris Buschelman     } else {
37114b396bbSKris Buschelman       SETERRQ1(PETSC_ERR_ARG_WRONG,"Memory allocation failure after %d bytes were allocated",ierr-A->m);
37214b396bbSKris Buschelman     }
37314b396bbSKris Buschelman   }
37414b396bbSKris Buschelman 
37514b396bbSKris Buschelman   /* Cleanup */
37614b396bbSKris Buschelman   ierr = PetscFree(etree);CHKERRQ(ierr);
377*75af56d4SHong Zhang #endif /* OLD */
37814b396bbSKris Buschelman 
379*75af56d4SHong Zhang 
38014b396bbSKris Buschelman }
38114b396bbSKris Buschelman 
38214b396bbSKris Buschelman /*
38314b396bbSKris Buschelman    Note the r permutation is ignored
38414b396bbSKris Buschelman */
38514b396bbSKris Buschelman #undef __FUNCT__
386f0c56d0fSKris Buschelman #define __FUNCT__ "MatLUFactorSymbolic_SuperLU"
387f0c56d0fSKris Buschelman int MatLUFactorSymbolic_SuperLU(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F)
38814b396bbSKris Buschelman {
38914b396bbSKris Buschelman   Mat          B;
390f0c56d0fSKris Buschelman   Mat_SuperLU  *lu;
391*75af56d4SHong Zhang   int          ierr,m=A->m,n=A->n;     /* *ca; */
39214b396bbSKris Buschelman 
39314b396bbSKris Buschelman   PetscFunctionBegin;
39414b396bbSKris Buschelman 
395899d7b4fSKris Buschelman   ierr = MatCreate(A->comm,A->m,A->n,PETSC_DETERMINE,PETSC_DETERMINE,&B);CHKERRQ(ierr);
396899d7b4fSKris Buschelman   ierr = MatSetType(B,MATSUPERLU);CHKERRQ(ierr);
397899d7b4fSKris Buschelman   ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr);
398f0c56d0fSKris Buschelman 
399f0c56d0fSKris Buschelman   B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU;
400f0c56d0fSKris Buschelman   B->ops->solve           = MatSolve_SuperLU;
40114b396bbSKris Buschelman   B->factor               = FACTOR_LU;
402899d7b4fSKris Buschelman   B->assembled            = PETSC_TRUE;  /* required by -sles_view */
40314b396bbSKris Buschelman 
404f0c56d0fSKris Buschelman   lu = (Mat_SuperLU*)(B->spptr);
405*75af56d4SHong Zhang #ifdef SUPERLU2
4062ecb5974SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatCreateNull","MatCreateNull_SuperLU",
407f0c56d0fSKris Buschelman                                     (void(*)(void))MatCreateNull_SuperLU);CHKERRQ(ierr);
408*75af56d4SHong Zhang #endif
409*75af56d4SHong Zhang #ifdef OLD
41014b396bbSKris Buschelman   /* Allocate the work arrays required by SuperLU (notice sizes are for the transpose) */
41114b396bbSKris Buschelman   ierr = PetscMalloc(A->n*sizeof(int),&lu->perm_r);CHKERRQ(ierr);
41214b396bbSKris Buschelman   ierr = PetscMalloc(A->m*sizeof(int),&lu->perm_c);CHKERRQ(ierr);
41314b396bbSKris Buschelman 
41414b396bbSKris Buschelman   /* use PETSc mat ordering */
41514b396bbSKris Buschelman   ierr = ISGetIndices(c,&ca);CHKERRQ(ierr);
41614b396bbSKris Buschelman   ierr = PetscMemcpy(lu->perm_c,ca,A->m*sizeof(int));CHKERRQ(ierr);
41714b396bbSKris Buschelman   ierr = ISRestoreIndices(c,&ca);CHKERRQ(ierr);
41814b396bbSKris Buschelman   lu->SuperluMatOdering = PETSC_FALSE;
41914b396bbSKris Buschelman 
42014b396bbSKris Buschelman   lu->pivot_threshold = info->dtcol;
421f0c56d0fSKris Buschelman   PetscLogObjectMemory(B,(A->m+A->n)*sizeof(int)+sizeof(Mat_SuperLU));
422*75af56d4SHong Zhang #endif
423*75af56d4SHong Zhang   /* Allocate spaces (notice sizes are for the transpose) */
424*75af56d4SHong Zhang   int            *perm_c; /* column permutation vector */
425*75af56d4SHong Zhang   int            *perm_r; /* row permutations from partial pivoting */
426*75af56d4SHong Zhang   if ( !(lu->etree = intMalloc(m)) ) ABORT("Malloc fails for etree[].");
427*75af56d4SHong Zhang   if ( !(lu->perm_r = intMalloc(n)) ) ABORT("Malloc fails for perm_r[].");
428*75af56d4SHong Zhang   if ( !(lu->perm_c = intMalloc(m)) ) ABORT("Malloc fails for perm_c[].");
429*75af56d4SHong Zhang   if ( !(lu->R = (double *) SUPERLU_MALLOC(n * sizeof(double))) )
430*75af56d4SHong Zhang     ABORT("SUPERLU_MALLOC fails for R[].");
431*75af56d4SHong Zhang   if ( !(lu->C = (double *) SUPERLU_MALLOC(m * sizeof(double))) )
432*75af56d4SHong Zhang     ABORT("SUPERLU_MALLOC fails for C[].");
433*75af56d4SHong Zhang 
434*75af56d4SHong Zhang   /* create rhs and solution x without allocate space for Store */
435*75af56d4SHong Zhang   dCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
436*75af56d4SHong Zhang   dCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE);
43714b396bbSKris Buschelman 
43814b396bbSKris Buschelman   lu->flg            = DIFFERENT_NONZERO_PATTERN;
439e740cb95SKris Buschelman   lu->CleanUpSuperLU = PETSC_TRUE;
440899d7b4fSKris Buschelman 
441899d7b4fSKris Buschelman   *F = B;
44214b396bbSKris Buschelman   PetscFunctionReturn(0);
44314b396bbSKris Buschelman }
44414b396bbSKris Buschelman 
44514b396bbSKris Buschelman /* used by -sles_view */
44614b396bbSKris Buschelman #undef __FUNCT__
447f0c56d0fSKris Buschelman #define __FUNCT__ "MatFactorInfo_SuperLU"
448f0c56d0fSKris Buschelman int MatFactorInfo_SuperLU(Mat A,PetscViewer viewer)
44914b396bbSKris Buschelman {
450f0c56d0fSKris Buschelman   Mat_SuperLU *lu= (Mat_SuperLU*)A->spptr;
451f0c56d0fSKris Buschelman   int         ierr;
452f0c56d0fSKris Buschelman 
453f0c56d0fSKris Buschelman   PetscFunctionBegin;
454f0c56d0fSKris Buschelman   ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr);
455*75af56d4SHong Zhang #ifdef OLD
456f0c56d0fSKris Buschelman   if(lu->SuperluMatOdering) {
457f0c56d0fSKris Buschelman     ierr = PetscViewerASCIIPrintf(viewer,"  SuperLU mat ordering: %d\n",lu->ispec);CHKERRQ(ierr);
458f0c56d0fSKris Buschelman   }
459*75af56d4SHong Zhang #endif
460f0c56d0fSKris Buschelman   PetscFunctionReturn(0);
461f0c56d0fSKris Buschelman }
462f0c56d0fSKris Buschelman 
463f0c56d0fSKris Buschelman #undef __FUNCT__
464f0c56d0fSKris Buschelman #define __FUNCT__ "MatDuplicate_SuperLU"
465f0c56d0fSKris Buschelman int MatDuplicate_SuperLU(Mat A, MatDuplicateOption op, Mat *M) {
46614b396bbSKris Buschelman   int         ierr;
4678f340917SKris Buschelman   Mat_SuperLU *lu=(Mat_SuperLU *)A->spptr;
4688f340917SKris Buschelman 
46914b396bbSKris Buschelman   PetscFunctionBegin;
4708f340917SKris Buschelman   ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr);
471f0c56d0fSKris Buschelman   ierr = MatConvert_SeqAIJ_SuperLU(*M,MATSUPERLU,M);CHKERRQ(ierr);
4723f953163SKris Buschelman   ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU));CHKERRQ(ierr);
47314b396bbSKris Buschelman   PetscFunctionReturn(0);
47414b396bbSKris Buschelman }
47514b396bbSKris Buschelman 
47614b396bbSKris Buschelman EXTERN_C_BEGIN
47714b396bbSKris Buschelman #undef __FUNCT__
478b0592601SKris Buschelman #define __FUNCT__ "MatConvert_SuperLU_SeqAIJ"
479b0592601SKris Buschelman int MatConvert_SuperLU_SeqAIJ(Mat A,MatType type,Mat *newmat) {
480fb3e25aaSKris Buschelman   /* This routine is only called to convert an unfactored PETSc-SuperLU matrix */
481fb3e25aaSKris Buschelman   /* to its base PETSc type, so we will ignore 'MatType type'. */
48214b396bbSKris Buschelman   int                  ierr;
483b0592601SKris Buschelman   Mat                  B=*newmat;
484f0c56d0fSKris Buschelman   Mat_SuperLU   *lu=(Mat_SuperLU *)A->spptr;
485b0592601SKris Buschelman 
486b0592601SKris Buschelman   PetscFunctionBegin;
487b0592601SKris Buschelman   if (B != A) {
488b0592601SKris Buschelman     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
489f0c56d0fSKris Buschelman   }
490b0592601SKris Buschelman   /* Reset the original function pointers */
491f0c56d0fSKris Buschelman   B->ops->duplicate        = lu->MatDuplicate;
492b0592601SKris Buschelman   B->ops->view             = lu->MatView;
493b0592601SKris Buschelman   B->ops->assemblyend      = lu->MatAssemblyEnd;
494b0592601SKris Buschelman   B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic;
495b0592601SKris Buschelman   B->ops->destroy          = lu->MatDestroy;
496b0592601SKris Buschelman   /* lu is only a function pointer stash unless we've factored the matrix, which we haven't! */
497b0592601SKris Buschelman   ierr = PetscFree(lu);CHKERRQ(ierr);
498b0592601SKris Buschelman   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJ);CHKERRQ(ierr);
499b0592601SKris Buschelman   *newmat = B;
500b0592601SKris Buschelman   PetscFunctionReturn(0);
501b0592601SKris Buschelman }
502b0592601SKris Buschelman EXTERN_C_END
503b0592601SKris Buschelman 
504b0592601SKris Buschelman EXTERN_C_BEGIN
505b0592601SKris Buschelman #undef __FUNCT__
506b0592601SKris Buschelman #define __FUNCT__ "MatConvert_SeqAIJ_SuperLU"
507b0592601SKris Buschelman int MatConvert_SeqAIJ_SuperLU(Mat A,MatType type,Mat *newmat) {
508fb3e25aaSKris Buschelman   /* This routine is only called to convert to MATSUPERLU */
509fb3e25aaSKris Buschelman   /* from MATSEQAIJ, so we will ignore 'MatType type'. */
510b0592601SKris Buschelman   int         ierr;
511b0592601SKris Buschelman   Mat         B=*newmat;
512f0c56d0fSKris Buschelman   Mat_SuperLU *lu;
51314b396bbSKris Buschelman 
51414b396bbSKris Buschelman   PetscFunctionBegin;
515b0592601SKris Buschelman   if (B != A) {
516b0592601SKris Buschelman     ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr);
517b0592601SKris Buschelman   }
51814b396bbSKris Buschelman 
519f0c56d0fSKris Buschelman   ierr = PetscNew(Mat_SuperLU,&lu);CHKERRQ(ierr);
520f0c56d0fSKris Buschelman   lu->MatDuplicate         = A->ops->duplicate;
52114b396bbSKris Buschelman   lu->MatView              = A->ops->view;
52214b396bbSKris Buschelman   lu->MatAssemblyEnd       = A->ops->assemblyend;
523b0592601SKris Buschelman   lu->MatLUFactorSymbolic  = A->ops->lufactorsymbolic;
52414b396bbSKris Buschelman   lu->MatDestroy           = A->ops->destroy;
52514b396bbSKris Buschelman   lu->CleanUpSuperLU       = PETSC_FALSE;
526b0592601SKris Buschelman 
527b0592601SKris Buschelman   B->spptr                 = (void*)lu;
528f0c56d0fSKris Buschelman   B->ops->duplicate        = MatDuplicate_SuperLU;
529f0c56d0fSKris Buschelman   B->ops->view             = MatView_SuperLU;
530f0c56d0fSKris Buschelman   B->ops->assemblyend      = MatAssemblyEnd_SuperLU;
531f0c56d0fSKris Buschelman   B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU;
532f0c56d0fSKris Buschelman   B->ops->destroy          = MatDestroy_SuperLU;
533b0592601SKris Buschelman 
534b0592601SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_C",
535b0592601SKris Buschelman                                            "MatConvert_SeqAIJ_SuperLU",MatConvert_SeqAIJ_SuperLU);CHKERRQ(ierr);
536b0592601SKris Buschelman   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_seqaij_C",
537b0592601SKris Buschelman                                            "MatConvert_SuperLU_SeqAIJ",MatConvert_SuperLU_SeqAIJ);CHKERRQ(ierr);
538fb3e25aaSKris Buschelman   PetscLogInfo(0,"Using SuperLU for SeqAIJ LU factorization and solves.");
539fb3e25aaSKris Buschelman   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU);CHKERRQ(ierr);
540b0592601SKris Buschelman   *newmat = B;
541b0592601SKris Buschelman   PetscFunctionReturn(0);
542b0592601SKris Buschelman }
543b0592601SKris Buschelman EXTERN_C_END
544b0592601SKris Buschelman 
54524b6179bSKris Buschelman /*MC
546fafad747SKris Buschelman   MATSUPERLU - MATSUPERLU = "superlu" - A matrix type providing direct solvers (LU) for sequential matrices
54724b6179bSKris Buschelman   via the external package SuperLU.
54824b6179bSKris Buschelman 
54924b6179bSKris Buschelman   If SuperLU is installed (see the manual for
55024b6179bSKris Buschelman   instructions on how to declare the existence of external packages),
55124b6179bSKris Buschelman   a matrix type can be constructed which invokes SuperLU solvers.
55224b6179bSKris Buschelman   After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU).
55324b6179bSKris Buschelman   This matrix type is only supported for double precision real.
55424b6179bSKris Buschelman 
55524b6179bSKris Buschelman   This matrix inherits from MATSEQAIJ.  As a result, MatSeqAIJSetPreallocation is
55628b08bd3SKris Buschelman   supported for this matrix type.  One can also call MatConvert for an inplace conversion to or from
55728b08bd3SKris Buschelman   the MATSEQAIJ type without data copy.
55824b6179bSKris Buschelman 
55924b6179bSKris Buschelman   Options Database Keys:
5600bad9183SKris Buschelman + -mat_type superlu - sets the matrix type to "superlu" during a call to MatSetFromOptions()
56124b6179bSKris Buschelman - -mat_superlu_ordering <0,1,2,3> - 0: natural ordering,
56224b6179bSKris Buschelman                                     1: MMD applied to A'*A,
56324b6179bSKris Buschelman                                     2: MMD applied to A'+A,
56424b6179bSKris Buschelman                                     3: COLAMD, approximate minimum degree column ordering
56524b6179bSKris Buschelman 
56624b6179bSKris Buschelman    Level: beginner
56724b6179bSKris Buschelman 
56824b6179bSKris Buschelman .seealso: PCLU
56924b6179bSKris Buschelman M*/
57024b6179bSKris Buschelman 
571b0592601SKris Buschelman EXTERN_C_BEGIN
572b0592601SKris Buschelman #undef __FUNCT__
573f0c56d0fSKris Buschelman #define __FUNCT__ "MatCreate_SuperLU"
574f0c56d0fSKris Buschelman int MatCreate_SuperLU(Mat A) {
575b0592601SKris Buschelman   int ierr;
576b0592601SKris Buschelman 
577b0592601SKris Buschelman   PetscFunctionBegin;
5785441df8eSKris Buschelman   /* Change type name before calling MatSetType to force proper construction of SeqAIJ and SUPERLU types */
5795441df8eSKris Buschelman   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU);CHKERRQ(ierr);
580b0592601SKris Buschelman   ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr);
581b0592601SKris Buschelman   ierr = MatConvert_SeqAIJ_SuperLU(A,MATSUPERLU,&A);CHKERRQ(ierr);
58214b396bbSKris Buschelman   PetscFunctionReturn(0);
58314b396bbSKris Buschelman }
58414b396bbSKris Buschelman EXTERN_C_END
585