114b396bbSKris Buschelman /*$Id: superlu.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 214b396bbSKris Buschelman 314b396bbSKris Buschelman /* 4*75af56d4SHong Zhang Provides an interface to the SuperLU 3.0 sparse solver 514b396bbSKris Buschelman */ 614b396bbSKris Buschelman 714b396bbSKris Buschelman #include "src/mat/impls/aij/seq/aij.h" 814b396bbSKris Buschelman 914b396bbSKris Buschelman EXTERN_C_BEGIN 1014b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX) 1114b396bbSKris Buschelman #include "zsp_defs.h" 1214b396bbSKris Buschelman #else 1314b396bbSKris Buschelman #include "dsp_defs.h" 1414b396bbSKris Buschelman #endif 1514b396bbSKris Buschelman #include "util.h" 1614b396bbSKris Buschelman EXTERN_C_END 1714b396bbSKris Buschelman 1814b396bbSKris Buschelman typedef struct { 19*75af56d4SHong Zhang SuperMatrix A,L,U,B,X; 20*75af56d4SHong Zhang superlu_options_t options; 21*75af56d4SHong Zhang int *perm_c; /* column permutation vector */ 22*75af56d4SHong Zhang int *perm_r; /* row permutations from partial pivoting */ 23*75af56d4SHong Zhang int *etree; 24*75af56d4SHong Zhang double *R, *C; 25*75af56d4SHong Zhang char equed[1]; 26*75af56d4SHong Zhang int lwork; 27*75af56d4SHong Zhang void *work; 28*75af56d4SHong Zhang double rpg, rcond; 29*75af56d4SHong Zhang mem_usage_t mem_usage; 30*75af56d4SHong Zhang MatStructure flg; 31*75af56d4SHong Zhang /* 3214b396bbSKris Buschelman SuperMatrix A,B,AC,L,U; 3314b396bbSKris Buschelman int *perm_r,*perm_c,ispec,relax,panel_size; 3414b396bbSKris Buschelman double pivot_threshold; 3514b396bbSKris Buschelman NCformat *store; 3614b396bbSKris Buschelman MatStructure flg; 3714b396bbSKris Buschelman PetscTruth SuperluMatOdering; 38*75af56d4SHong Zhang */ 3914b396bbSKris Buschelman 4014b396bbSKris Buschelman /* A few function pointers for inheritance */ 41f0c56d0fSKris Buschelman int (*MatDuplicate)(Mat,MatDuplicateOption,Mat*); 4214b396bbSKris Buschelman int (*MatView)(Mat,PetscViewer); 4314b396bbSKris Buschelman int (*MatAssemblyEnd)(Mat,MatAssemblyType); 44b0592601SKris Buschelman int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 4514b396bbSKris Buschelman int (*MatDestroy)(Mat); 4614b396bbSKris Buschelman 4714b396bbSKris Buschelman /* Flag to clean up (non-global) SuperLU objects during Destroy */ 4814b396bbSKris Buschelman PetscTruth CleanUpSuperLU; 49f0c56d0fSKris Buschelman } Mat_SuperLU; 5014b396bbSKris Buschelman 5114b396bbSKris Buschelman 52f0c56d0fSKris Buschelman EXTERN int MatFactorInfo_SuperLU(Mat,PetscViewer); 53f0c56d0fSKris Buschelman EXTERN int MatLUFactorSymbolic_SuperLU(Mat,IS,IS,MatFactorInfo*,Mat*); 54b0592601SKris Buschelman 55b0592601SKris Buschelman EXTERN_C_BEGIN 56b0592601SKris Buschelman EXTERN int MatConvert_SuperLU_SeqAIJ(Mat,MatType,Mat*); 57b0592601SKris Buschelman EXTERN int MatConvert_SeqAIJ_SuperLU(Mat,MatType,Mat*); 58b0592601SKris Buschelman EXTERN_C_END 5914b396bbSKris Buschelman 6014b396bbSKris Buschelman #undef __FUNCT__ 61f0c56d0fSKris Buschelman #define __FUNCT__ "MatDestroy_SuperLU" 62f0c56d0fSKris Buschelman int MatDestroy_SuperLU(Mat A) 6314b396bbSKris Buschelman { 64460c4903SKris Buschelman int ierr; 65f0c56d0fSKris Buschelman Mat_SuperLU *lu = (Mat_SuperLU*)A->spptr; 6614b396bbSKris Buschelman 6714b396bbSKris Buschelman PetscFunctionBegin; 68*75af56d4SHong Zhang if (lu->CleanUpSuperLU) { /* Free the SuperLU datastructures */ 69*75af56d4SHong Zhang /* Destroy_CompCol_Matrix(&lu->A); */ /* hangs inside memory.c! */ 70*75af56d4SHong Zhang Destroy_SuperMatrix_Store(&lu->A); 71*75af56d4SHong Zhang Destroy_SuperMatrix_Store(&lu->B); 72*75af56d4SHong Zhang Destroy_SuperMatrix_Store(&lu->X); 73*75af56d4SHong Zhang SUPERLU_FREE (lu->etree); 74*75af56d4SHong Zhang SUPERLU_FREE (lu->perm_r); 75*75af56d4SHong Zhang SUPERLU_FREE (lu->perm_c); 76*75af56d4SHong Zhang SUPERLU_FREE (lu->R); 77*75af56d4SHong Zhang SUPERLU_FREE (lu->C); 78*75af56d4SHong Zhang if ( lu->lwork >= 0 ) { 79fb3e25aaSKris Buschelman Destroy_SuperNode_Matrix(&lu->L); 80fb3e25aaSKris Buschelman Destroy_CompCol_Matrix(&lu->U); 81*75af56d4SHong Zhang } 82fb3e25aaSKris Buschelman } 83b0592601SKris Buschelman ierr = MatConvert_SuperLU_SeqAIJ(A,MATSEQAIJ,&A);CHKERRQ(ierr); 84fb3e25aaSKris Buschelman ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 8514b396bbSKris Buschelman PetscFunctionReturn(0); 8614b396bbSKris Buschelman } 8714b396bbSKris Buschelman 8814b396bbSKris Buschelman #undef __FUNCT__ 89f0c56d0fSKris Buschelman #define __FUNCT__ "MatView_SuperLU" 90f0c56d0fSKris Buschelman int MatView_SuperLU(Mat A,PetscViewer viewer) 9114b396bbSKris Buschelman { 9214b396bbSKris Buschelman int ierr; 9314b396bbSKris Buschelman PetscTruth isascii; 9414b396bbSKris Buschelman PetscViewerFormat format; 95f0c56d0fSKris Buschelman Mat_SuperLU *lu=(Mat_SuperLU*)(A->spptr); 9614b396bbSKris Buschelman 9714b396bbSKris Buschelman PetscFunctionBegin; 9814b396bbSKris Buschelman ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 9914b396bbSKris Buschelman 10014b396bbSKris Buschelman ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 10114b396bbSKris Buschelman if (isascii) { 10214b396bbSKris Buschelman ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 10314b396bbSKris Buschelman if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 104f0c56d0fSKris Buschelman ierr = MatFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr); 10514b396bbSKris Buschelman } 10614b396bbSKris Buschelman } 10714b396bbSKris Buschelman PetscFunctionReturn(0); 10814b396bbSKris Buschelman } 10914b396bbSKris Buschelman 11014b396bbSKris Buschelman #undef __FUNCT__ 111f0c56d0fSKris Buschelman #define __FUNCT__ "MatAssemblyEnd_SuperLU" 112f0c56d0fSKris Buschelman int MatAssemblyEnd_SuperLU(Mat A,MatAssemblyType mode) { 11314b396bbSKris Buschelman int ierr; 114f0c56d0fSKris Buschelman Mat_SuperLU *lu=(Mat_SuperLU*)(A->spptr); 11514b396bbSKris Buschelman 11614b396bbSKris Buschelman PetscFunctionBegin; 11714b396bbSKris Buschelman ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 118b0592601SKris Buschelman 119b0592601SKris Buschelman lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 120f0c56d0fSKris Buschelman A->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU; 12114b396bbSKris Buschelman PetscFunctionReturn(0); 12214b396bbSKris Buschelman } 12314b396bbSKris Buschelman 124*75af56d4SHong Zhang /* This function was written for SuperLU 2.0 by Matthew Knepley. Not tested for SuperLU 3.0! */ 125*75af56d4SHong Zhang #ifdef SuperLU2 12614b396bbSKris Buschelman #include "src/mat/impls/dense/seq/dense.h" 12714b396bbSKris Buschelman #undef __FUNCT__ 128f0c56d0fSKris Buschelman #define __FUNCT__ "MatCreateNull_SuperLU" 129f0c56d0fSKris Buschelman int MatCreateNull_SuperLU(Mat A,Mat *nullMat) 13014b396bbSKris Buschelman { 131f0c56d0fSKris Buschelman Mat_SuperLU *lu = (Mat_SuperLU*)A->spptr; 13214b396bbSKris Buschelman int numRows = A->m,numCols = A->n; 13314b396bbSKris Buschelman SCformat *Lstore; 13414b396bbSKris Buschelman int numNullCols,size; 135*75af56d4SHong Zhang SuperLUStat_t stat; 13614b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX) 13714b396bbSKris Buschelman doublecomplex *nullVals,*workVals; 13814b396bbSKris Buschelman #else 13914b396bbSKris Buschelman PetscScalar *nullVals,*workVals; 14014b396bbSKris Buschelman #endif 14114b396bbSKris Buschelman int row,newRow,col,newCol,block,b,ierr; 14214b396bbSKris Buschelman 14314b396bbSKris Buschelman PetscFunctionBegin; 14414b396bbSKris Buschelman PetscValidHeaderSpecific(A,MAT_COOKIE); 14514b396bbSKris Buschelman PetscValidPointer(nullMat); 14614b396bbSKris Buschelman if (!A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Unfactored matrix"); 14714b396bbSKris Buschelman numNullCols = numCols - numRows; 14814b396bbSKris Buschelman if (numNullCols < 0) SETERRQ(PETSC_ERR_ARG_WRONG,"Function only applies to underdetermined problems"); 14914b396bbSKris Buschelman /* Create the null matrix */ 15014b396bbSKris Buschelman ierr = MatCreateSeqDense(A->comm,numRows,numNullCols,PETSC_NULL,nullMat);CHKERRQ(ierr); 15114b396bbSKris Buschelman if (numNullCols == 0) { 15214b396bbSKris Buschelman ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15314b396bbSKris Buschelman ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 15414b396bbSKris Buschelman PetscFunctionReturn(0); 15514b396bbSKris Buschelman } 15614b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX) 15714b396bbSKris Buschelman nullVals = (doublecomplex*)((Mat_SeqDense*)(*nullMat)->data)->v; 15814b396bbSKris Buschelman #else 15914b396bbSKris Buschelman nullVals = ((Mat_SeqDense*)(*nullMat)->data)->v; 16014b396bbSKris Buschelman #endif 16114b396bbSKris Buschelman /* Copy in the columns */ 16214b396bbSKris Buschelman Lstore = (SCformat*)lu->L.Store; 16314b396bbSKris Buschelman for(block = 0; block <= Lstore->nsuper; block++) { 16414b396bbSKris Buschelman newRow = Lstore->sup_to_col[block]; 16514b396bbSKris Buschelman size = Lstore->sup_to_col[block+1] - Lstore->sup_to_col[block]; 16614b396bbSKris Buschelman for(col = Lstore->rowind_colptr[newRow]; col < Lstore->rowind_colptr[newRow+1]; col++) { 16714b396bbSKris Buschelman newCol = Lstore->rowind[col]; 16814b396bbSKris Buschelman if (newCol >= numRows) { 16914b396bbSKris Buschelman for(b = 0; b < size; b++) 17014b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX) 17114b396bbSKris Buschelman nullVals[(newCol-numRows)*numRows+newRow+b] = ((doublecomplex*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col]; 17214b396bbSKris Buschelman #else 17314b396bbSKris Buschelman nullVals[(newCol-numRows)*numRows+newRow+b] = ((double*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col]; 17414b396bbSKris Buschelman #endif 17514b396bbSKris Buschelman } 17614b396bbSKris Buschelman } 17714b396bbSKris Buschelman } 17814b396bbSKris Buschelman /* Permute rhs to form P^T_c B */ 17914b396bbSKris Buschelman ierr = PetscMalloc(numRows*sizeof(double),&workVals);CHKERRQ(ierr); 18014b396bbSKris Buschelman for(b = 0; b < numNullCols; b++) { 18114b396bbSKris Buschelman for(row = 0; row < numRows; row++) workVals[lu->perm_c[row]] = nullVals[b*numRows+row]; 18214b396bbSKris Buschelman for(row = 0; row < numRows; row++) nullVals[b*numRows+row] = workVals[row]; 18314b396bbSKris Buschelman } 18414b396bbSKris Buschelman /* Backward solve the upper triangle A x = b */ 18514b396bbSKris Buschelman for(b = 0; b < numNullCols; b++) { 18614b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX) 187*75af56d4SHong Zhang sp_ztrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&stat,&ierr); 18814b396bbSKris Buschelman #else 189*75af56d4SHong Zhang sp_dtrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&stat,&ierr); 19014b396bbSKris Buschelman #endif 19114b396bbSKris Buschelman if (ierr < 0) 19214b396bbSKris Buschelman SETERRQ1(PETSC_ERR_ARG_WRONG,"The argument %d was invalid",-ierr); 19314b396bbSKris Buschelman } 19414b396bbSKris Buschelman ierr = PetscFree(workVals);CHKERRQ(ierr); 19514b396bbSKris Buschelman 19614b396bbSKris Buschelman ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 19714b396bbSKris Buschelman ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 19814b396bbSKris Buschelman PetscFunctionReturn(0); 19914b396bbSKris Buschelman } 200*75af56d4SHong Zhang #endif 20114b396bbSKris Buschelman 20214b396bbSKris Buschelman #undef __FUNCT__ 203f0c56d0fSKris Buschelman #define __FUNCT__ "MatSolve_SuperLU" 204f0c56d0fSKris Buschelman int MatSolve_SuperLU(Mat A,Vec b,Vec x) 20514b396bbSKris Buschelman { 206f0c56d0fSKris Buschelman Mat_SuperLU *lu = (Mat_SuperLU*)A->spptr; 207*75af56d4SHong Zhang PetscScalar *barray,*xarray; 208*75af56d4SHong Zhang int m,n,ierr,lwork,info,i; 209*75af56d4SHong Zhang SuperLUStat_t stat; 210*75af56d4SHong Zhang double ferr,berr,*rhsb,*rhsx; 21114b396bbSKris Buschelman 21214b396bbSKris Buschelman PetscFunctionBegin; 213*75af56d4SHong Zhang /* rhs vector */ 214*75af56d4SHong Zhang lu->B.ncol = 1; /* Set the number of right-hand side */ 215*75af56d4SHong Zhang ierr = VecGetArray(b,&barray);CHKERRQ(ierr); 216*75af56d4SHong Zhang ((DNformat*)lu->B.Store)->nzval = barray; 217*75af56d4SHong Zhang 218*75af56d4SHong Zhang /* solution vector */ 219*75af56d4SHong Zhang ierr = VecGetArray(x,&xarray);CHKERRQ(ierr); 220*75af56d4SHong Zhang ((DNformat*)lu->X.Store)->nzval = xarray; 221*75af56d4SHong Zhang 222*75af56d4SHong Zhang /* Initialize the statistics variables. */ 223*75af56d4SHong Zhang StatInit(&stat); 224*75af56d4SHong Zhang 225*75af56d4SHong Zhang lu->options.Fact = FACTORED; /* Indicate the factored form of A is supplied. */ 226*75af56d4SHong Zhang lu->options.Trans = TRANS; 227*75af56d4SHong Zhang dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 228*75af56d4SHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 229*75af56d4SHong Zhang &lu->mem_usage, &stat, &info); 230*75af56d4SHong Zhang 231*75af56d4SHong Zhang ierr = VecRestoreArray(b,&barray);CHKERRQ(ierr); 232*75af56d4SHong Zhang ierr = VecRestoreArray(x,&xarray);CHKERRQ(ierr); 233*75af56d4SHong Zhang 234*75af56d4SHong Zhang if ( info == 0 || info == lu->A.ncol+1 ) { 235*75af56d4SHong Zhang if ( lu->options.IterRefine ) { 236*75af56d4SHong Zhang printf("Iterative Refinement:\n"); 237*75af56d4SHong Zhang printf("%8s%8s%16s%16s\n", "rhs", "Steps", "FERR", "BERR"); 238*75af56d4SHong Zhang for (i = 0; i < 1; ++i) 239*75af56d4SHong Zhang printf("%8d%8d%16e%16e\n", i+1, stat.RefineSteps, ferr, berr); 240*75af56d4SHong Zhang } 241*75af56d4SHong Zhang fflush(stdout); 242*75af56d4SHong Zhang } else if ( info > 0 && lu->lwork == -1 ) { 243*75af56d4SHong Zhang printf("** Estimated memory: %d bytes\n", info - n); 24414b396bbSKris Buschelman } 24514b396bbSKris Buschelman 246*75af56d4SHong Zhang if ( lu->options.PrintStat ) StatPrint(&stat); 247*75af56d4SHong Zhang StatFree(&stat); 248*75af56d4SHong Zhang PetscFunctionReturn(0); 249*75af56d4SHong Zhang } 25014b396bbSKris Buschelman 25114b396bbSKris Buschelman #undef __FUNCT__ 252f0c56d0fSKris Buschelman #define __FUNCT__ "MatLUFactorNumeric_SuperLU" 253f0c56d0fSKris Buschelman int MatLUFactorNumeric_SuperLU(Mat A,Mat *F) 25414b396bbSKris Buschelman { 25514b396bbSKris Buschelman Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(A)->data; 256f0c56d0fSKris Buschelman Mat_SuperLU *lu = (Mat_SuperLU*)(*F)->spptr; 257*75af56d4SHong Zhang int ierr,info; 25814b396bbSKris Buschelman PetscTruth flag; 259*75af56d4SHong Zhang SuperLUStat_t stat; 260*75af56d4SHong Zhang double ferr, berr; 26114b396bbSKris Buschelman 26214b396bbSKris Buschelman PetscFunctionBegin; 263*75af56d4SHong Zhang if (lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numeric factorization */ 264*75af56d4SHong Zhang /* Set SuperLU options */ 265*75af56d4SHong Zhang /* the default values for options argument: 266*75af56d4SHong Zhang options.Fact = DOFACT; 267*75af56d4SHong Zhang options.Equil = YES; 268*75af56d4SHong Zhang options.ColPerm = COLAMD; 269*75af56d4SHong Zhang options.DiagPivotThresh = 1.0; 270*75af56d4SHong Zhang options.Trans = NOTRANS; 271*75af56d4SHong Zhang options.IterRefine = NOREFINE; 272*75af56d4SHong Zhang options.SymmetricMode = NO; 273*75af56d4SHong Zhang options.PivotGrowth = NO; 274*75af56d4SHong Zhang options.ConditionNumber = NO; 275*75af56d4SHong Zhang options.PrintStat = YES; 27614b396bbSKris Buschelman */ 277*75af56d4SHong Zhang set_default_options(&lu->options); 278*75af56d4SHong Zhang lu->options.Equil = NO; /* equilibration causes error in solve */ 279*75af56d4SHong Zhang } else { /* successing numerical factorization */ 280*75af56d4SHong Zhang /* Ref: ~SuperLU_3.0/EXAMPLE/dlinsolx2.c */ 281*75af56d4SHong Zhang Destroy_SuperMatrix_Store(&lu->A); 282*75af56d4SHong Zhang if ( lu->lwork >= 0 ) { /* Deallocate storage associated with L and U. */ 283*75af56d4SHong Zhang Destroy_SuperNode_Matrix(&lu->L); 284*75af56d4SHong Zhang Destroy_CompCol_Matrix(&lu->U); 285*75af56d4SHong Zhang lu->options.Fact = SamePattern; 28614b396bbSKris Buschelman } 287*75af56d4SHong Zhang } 28814b396bbSKris Buschelman 289*75af56d4SHong Zhang /* Create the SuperMatrix for lu->A=A^T: 290*75af56d4SHong Zhang Since SuperLU likes column-oriented matrices,we pass it the transpose, 291*75af56d4SHong Zhang and then solve A^T X = B in MatSolve(). */ 292*75af56d4SHong Zhang #if defined(PETSC_USE_COMPLEX) 293*75af56d4SHong Zhang zCreate_CompCol_Matrix(&lu->A,A->n,A->m,aa->nz,aa->a,aa->j,aa->i, 294*75af56d4SHong Zhang SLU_NC,SLU_Z,SLU_GE); 295*75af56d4SHong Zhang #else 296*75af56d4SHong Zhang dCreate_CompCol_Matrix(&lu->A,A->n,A->m,aa->nz,aa->a,aa->j,aa->i, 297*75af56d4SHong Zhang SLU_NC,SLU_D,SLU_GE); 298*75af56d4SHong Zhang #endif 299*75af56d4SHong Zhang 300*75af56d4SHong Zhang /* Initialize the statistics variables. */ 301*75af56d4SHong Zhang StatInit(&stat); 302*75af56d4SHong Zhang 303*75af56d4SHong Zhang lu->lwork = 0; /* allocate space internally by system malloc */ 304*75af56d4SHong Zhang lu->B.ncol = 0; /* Indicate not to solve the system */ 305*75af56d4SHong Zhang dgssvx(&lu->options, &lu->A, lu->perm_c, lu->perm_r, lu->etree, lu->equed, lu->R, lu->C, 306*75af56d4SHong Zhang &lu->L, &lu->U, lu->work, lu->lwork, &lu->B, &lu->X, &lu->rpg, &lu->rcond, &ferr, &berr, 307*75af56d4SHong Zhang &lu->mem_usage, &stat, &info); 308*75af56d4SHong Zhang 309*75af56d4SHong Zhang if ( info == 0 || info == lu->A.ncol+1 ) { 310*75af56d4SHong Zhang if ( lu->options.PivotGrowth ) printf("Recip. pivot growth = %e\n", lu->rpg); 311*75af56d4SHong Zhang if ( lu->options.ConditionNumber ) 312*75af56d4SHong Zhang printf("Recip. condition number = %e\n", lu->rcond); 313*75af56d4SHong Zhang /* 314*75af56d4SHong Zhang NCformat *Ustore; 315*75af56d4SHong Zhang SCformat *Lstore; 316*75af56d4SHong Zhang Lstore = (SCformat *) lu->L.Store; 317*75af56d4SHong Zhang Ustore = (NCformat *) lu->U.Store; 318*75af56d4SHong Zhang printf("No of nonzeros in factor L = %d\n", Lstore->nnz); 319*75af56d4SHong Zhang printf("No of nonzeros in factor U = %d\n", Ustore->nnz); 320*75af56d4SHong Zhang printf("No of nonzeros in L+U = %d\n", Lstore->nnz + Ustore->nnz - n); 321*75af56d4SHong Zhang printf("L\\U MB %.3f\ttotal MB needed %.3f\texpansions %d\n", 322*75af56d4SHong Zhang mem_usage.for_lu/1e6, mem_usage.total_needed/1e6, 323*75af56d4SHong Zhang mem_usage.expansions); 324*75af56d4SHong Zhang */ 325*75af56d4SHong Zhang fflush(stdout); 326*75af56d4SHong Zhang 327*75af56d4SHong Zhang } else if ( info > 0 && lu->lwork == -1 ) { 328*75af56d4SHong Zhang printf("** Estimated memory: %d bytes\n", info - lu->A.ncol); 329*75af56d4SHong Zhang } 330*75af56d4SHong Zhang 331*75af56d4SHong Zhang if ( lu->options.PrintStat ) StatPrint(&stat); 332*75af56d4SHong Zhang StatFree(&stat); 333*75af56d4SHong Zhang 334*75af56d4SHong Zhang lu->flg = SAME_NONZERO_PATTERN; 335*75af56d4SHong Zhang PetscFunctionReturn(0); 336*75af56d4SHong Zhang 337*75af56d4SHong Zhang #ifdef OLD 33814b396bbSKris Buschelman /* Set SuperLU options */ 33914b396bbSKris Buschelman lu->relax = sp_ienv(2); 34014b396bbSKris Buschelman lu->panel_size = sp_ienv(1); 34114b396bbSKris Buschelman /* We have to initialize global data or SuperLU crashes (sucky design) */ 34214b396bbSKris Buschelman if (!StatInitCalled) { 34314b396bbSKris Buschelman StatInit(lu->panel_size,lu->relax); 34414b396bbSKris Buschelman } 34514b396bbSKris Buschelman StatInitCalled++; 34614b396bbSKris Buschelman 34714b396bbSKris Buschelman ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU Options","Mat");CHKERRQ(ierr); 34814b396bbSKris Buschelman /* use SuperLU mat ordeing */ 34914b396bbSKris Buschelman ierr = PetscOptionsInt("-mat_superlu_ordering","SuperLU ordering type (one of 0, 1, 2, 3)\n 0: natural ordering;\n 1: MMD applied to A'*A;\n 2: MMD applied to A'+A;\n 3: COLAMD, approximate minimum degree column ordering","None",lu->ispec,&lu->ispec,&flag);CHKERRQ(ierr); 35014b396bbSKris Buschelman if (flag) { 35114b396bbSKris Buschelman get_perm_c(lu->ispec, &lu->A, lu->perm_c); 35214b396bbSKris Buschelman lu->SuperluMatOdering = PETSC_TRUE; 35314b396bbSKris Buschelman } 35414b396bbSKris Buschelman PetscOptionsEnd(); 35514b396bbSKris Buschelman 35614b396bbSKris Buschelman /* Create the elimination tree */ 35714b396bbSKris Buschelman ierr = PetscMalloc(A->n*sizeof(int),&etree);CHKERRQ(ierr); 35814b396bbSKris Buschelman sp_preorder("N",&lu->A,lu->perm_c,etree,&lu->AC); 35914b396bbSKris Buschelman /* Factor the matrix */ 36014b396bbSKris Buschelman #if defined(PETSC_USE_COMPLEX) 36114b396bbSKris Buschelman zgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr); 36214b396bbSKris Buschelman #else 36314b396bbSKris Buschelman dgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr); 36414b396bbSKris Buschelman #endif 36514b396bbSKris Buschelman if (ierr < 0) { 36614b396bbSKris Buschelman SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr); 36714b396bbSKris Buschelman } else if (ierr > 0) { 36814b396bbSKris Buschelman if (ierr <= A->m) { 36914b396bbSKris Buschelman SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element %d of U is exactly zero",ierr); 37014b396bbSKris Buschelman } else { 37114b396bbSKris Buschelman SETERRQ1(PETSC_ERR_ARG_WRONG,"Memory allocation failure after %d bytes were allocated",ierr-A->m); 37214b396bbSKris Buschelman } 37314b396bbSKris Buschelman } 37414b396bbSKris Buschelman 37514b396bbSKris Buschelman /* Cleanup */ 37614b396bbSKris Buschelman ierr = PetscFree(etree);CHKERRQ(ierr); 377*75af56d4SHong Zhang #endif /* OLD */ 37814b396bbSKris Buschelman 379*75af56d4SHong Zhang 38014b396bbSKris Buschelman } 38114b396bbSKris Buschelman 38214b396bbSKris Buschelman /* 38314b396bbSKris Buschelman Note the r permutation is ignored 38414b396bbSKris Buschelman */ 38514b396bbSKris Buschelman #undef __FUNCT__ 386f0c56d0fSKris Buschelman #define __FUNCT__ "MatLUFactorSymbolic_SuperLU" 387f0c56d0fSKris Buschelman int MatLUFactorSymbolic_SuperLU(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 38814b396bbSKris Buschelman { 38914b396bbSKris Buschelman Mat B; 390f0c56d0fSKris Buschelman Mat_SuperLU *lu; 391*75af56d4SHong Zhang int ierr,m=A->m,n=A->n; /* *ca; */ 39214b396bbSKris Buschelman 39314b396bbSKris Buschelman PetscFunctionBegin; 39414b396bbSKris Buschelman 395899d7b4fSKris Buschelman ierr = MatCreate(A->comm,A->m,A->n,PETSC_DETERMINE,PETSC_DETERMINE,&B);CHKERRQ(ierr); 396899d7b4fSKris Buschelman ierr = MatSetType(B,MATSUPERLU);CHKERRQ(ierr); 397899d7b4fSKris Buschelman ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 398f0c56d0fSKris Buschelman 399f0c56d0fSKris Buschelman B->ops->lufactornumeric = MatLUFactorNumeric_SuperLU; 400f0c56d0fSKris Buschelman B->ops->solve = MatSolve_SuperLU; 40114b396bbSKris Buschelman B->factor = FACTOR_LU; 402899d7b4fSKris Buschelman B->assembled = PETSC_TRUE; /* required by -sles_view */ 40314b396bbSKris Buschelman 404f0c56d0fSKris Buschelman lu = (Mat_SuperLU*)(B->spptr); 405*75af56d4SHong Zhang #ifdef SUPERLU2 4062ecb5974SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatCreateNull","MatCreateNull_SuperLU", 407f0c56d0fSKris Buschelman (void(*)(void))MatCreateNull_SuperLU);CHKERRQ(ierr); 408*75af56d4SHong Zhang #endif 409*75af56d4SHong Zhang #ifdef OLD 41014b396bbSKris Buschelman /* Allocate the work arrays required by SuperLU (notice sizes are for the transpose) */ 41114b396bbSKris Buschelman ierr = PetscMalloc(A->n*sizeof(int),&lu->perm_r);CHKERRQ(ierr); 41214b396bbSKris Buschelman ierr = PetscMalloc(A->m*sizeof(int),&lu->perm_c);CHKERRQ(ierr); 41314b396bbSKris Buschelman 41414b396bbSKris Buschelman /* use PETSc mat ordering */ 41514b396bbSKris Buschelman ierr = ISGetIndices(c,&ca);CHKERRQ(ierr); 41614b396bbSKris Buschelman ierr = PetscMemcpy(lu->perm_c,ca,A->m*sizeof(int));CHKERRQ(ierr); 41714b396bbSKris Buschelman ierr = ISRestoreIndices(c,&ca);CHKERRQ(ierr); 41814b396bbSKris Buschelman lu->SuperluMatOdering = PETSC_FALSE; 41914b396bbSKris Buschelman 42014b396bbSKris Buschelman lu->pivot_threshold = info->dtcol; 421f0c56d0fSKris Buschelman PetscLogObjectMemory(B,(A->m+A->n)*sizeof(int)+sizeof(Mat_SuperLU)); 422*75af56d4SHong Zhang #endif 423*75af56d4SHong Zhang /* Allocate spaces (notice sizes are for the transpose) */ 424*75af56d4SHong Zhang int *perm_c; /* column permutation vector */ 425*75af56d4SHong Zhang int *perm_r; /* row permutations from partial pivoting */ 426*75af56d4SHong Zhang if ( !(lu->etree = intMalloc(m)) ) ABORT("Malloc fails for etree[]."); 427*75af56d4SHong Zhang if ( !(lu->perm_r = intMalloc(n)) ) ABORT("Malloc fails for perm_r[]."); 428*75af56d4SHong Zhang if ( !(lu->perm_c = intMalloc(m)) ) ABORT("Malloc fails for perm_c[]."); 429*75af56d4SHong Zhang if ( !(lu->R = (double *) SUPERLU_MALLOC(n * sizeof(double))) ) 430*75af56d4SHong Zhang ABORT("SUPERLU_MALLOC fails for R[]."); 431*75af56d4SHong Zhang if ( !(lu->C = (double *) SUPERLU_MALLOC(m * sizeof(double))) ) 432*75af56d4SHong Zhang ABORT("SUPERLU_MALLOC fails for C[]."); 433*75af56d4SHong Zhang 434*75af56d4SHong Zhang /* create rhs and solution x without allocate space for Store */ 435*75af56d4SHong Zhang dCreate_Dense_Matrix(&lu->B, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE); 436*75af56d4SHong Zhang dCreate_Dense_Matrix(&lu->X, m, 1, PETSC_NULL, m, SLU_DN, SLU_D, SLU_GE); 43714b396bbSKris Buschelman 43814b396bbSKris Buschelman lu->flg = DIFFERENT_NONZERO_PATTERN; 439e740cb95SKris Buschelman lu->CleanUpSuperLU = PETSC_TRUE; 440899d7b4fSKris Buschelman 441899d7b4fSKris Buschelman *F = B; 44214b396bbSKris Buschelman PetscFunctionReturn(0); 44314b396bbSKris Buschelman } 44414b396bbSKris Buschelman 44514b396bbSKris Buschelman /* used by -sles_view */ 44614b396bbSKris Buschelman #undef __FUNCT__ 447f0c56d0fSKris Buschelman #define __FUNCT__ "MatFactorInfo_SuperLU" 448f0c56d0fSKris Buschelman int MatFactorInfo_SuperLU(Mat A,PetscViewer viewer) 44914b396bbSKris Buschelman { 450f0c56d0fSKris Buschelman Mat_SuperLU *lu= (Mat_SuperLU*)A->spptr; 451f0c56d0fSKris Buschelman int ierr; 452f0c56d0fSKris Buschelman 453f0c56d0fSKris Buschelman PetscFunctionBegin; 454f0c56d0fSKris Buschelman ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr); 455*75af56d4SHong Zhang #ifdef OLD 456f0c56d0fSKris Buschelman if(lu->SuperluMatOdering) { 457f0c56d0fSKris Buschelman ierr = PetscViewerASCIIPrintf(viewer," SuperLU mat ordering: %d\n",lu->ispec);CHKERRQ(ierr); 458f0c56d0fSKris Buschelman } 459*75af56d4SHong Zhang #endif 460f0c56d0fSKris Buschelman PetscFunctionReturn(0); 461f0c56d0fSKris Buschelman } 462f0c56d0fSKris Buschelman 463f0c56d0fSKris Buschelman #undef __FUNCT__ 464f0c56d0fSKris Buschelman #define __FUNCT__ "MatDuplicate_SuperLU" 465f0c56d0fSKris Buschelman int MatDuplicate_SuperLU(Mat A, MatDuplicateOption op, Mat *M) { 46614b396bbSKris Buschelman int ierr; 4678f340917SKris Buschelman Mat_SuperLU *lu=(Mat_SuperLU *)A->spptr; 4688f340917SKris Buschelman 46914b396bbSKris Buschelman PetscFunctionBegin; 4708f340917SKris Buschelman ierr = (*lu->MatDuplicate)(A,op,M);CHKERRQ(ierr); 471f0c56d0fSKris Buschelman ierr = MatConvert_SeqAIJ_SuperLU(*M,MATSUPERLU,M);CHKERRQ(ierr); 4723f953163SKris Buschelman ierr = PetscMemcpy((*M)->spptr,lu,sizeof(Mat_SuperLU));CHKERRQ(ierr); 47314b396bbSKris Buschelman PetscFunctionReturn(0); 47414b396bbSKris Buschelman } 47514b396bbSKris Buschelman 47614b396bbSKris Buschelman EXTERN_C_BEGIN 47714b396bbSKris Buschelman #undef __FUNCT__ 478b0592601SKris Buschelman #define __FUNCT__ "MatConvert_SuperLU_SeqAIJ" 479b0592601SKris Buschelman int MatConvert_SuperLU_SeqAIJ(Mat A,MatType type,Mat *newmat) { 480fb3e25aaSKris Buschelman /* This routine is only called to convert an unfactored PETSc-SuperLU matrix */ 481fb3e25aaSKris Buschelman /* to its base PETSc type, so we will ignore 'MatType type'. */ 48214b396bbSKris Buschelman int ierr; 483b0592601SKris Buschelman Mat B=*newmat; 484f0c56d0fSKris Buschelman Mat_SuperLU *lu=(Mat_SuperLU *)A->spptr; 485b0592601SKris Buschelman 486b0592601SKris Buschelman PetscFunctionBegin; 487b0592601SKris Buschelman if (B != A) { 488b0592601SKris Buschelman ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 489f0c56d0fSKris Buschelman } 490b0592601SKris Buschelman /* Reset the original function pointers */ 491f0c56d0fSKris Buschelman B->ops->duplicate = lu->MatDuplicate; 492b0592601SKris Buschelman B->ops->view = lu->MatView; 493b0592601SKris Buschelman B->ops->assemblyend = lu->MatAssemblyEnd; 494b0592601SKris Buschelman B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 495b0592601SKris Buschelman B->ops->destroy = lu->MatDestroy; 496b0592601SKris Buschelman /* lu is only a function pointer stash unless we've factored the matrix, which we haven't! */ 497b0592601SKris Buschelman ierr = PetscFree(lu);CHKERRQ(ierr); 498b0592601SKris Buschelman ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJ);CHKERRQ(ierr); 499b0592601SKris Buschelman *newmat = B; 500b0592601SKris Buschelman PetscFunctionReturn(0); 501b0592601SKris Buschelman } 502b0592601SKris Buschelman EXTERN_C_END 503b0592601SKris Buschelman 504b0592601SKris Buschelman EXTERN_C_BEGIN 505b0592601SKris Buschelman #undef __FUNCT__ 506b0592601SKris Buschelman #define __FUNCT__ "MatConvert_SeqAIJ_SuperLU" 507b0592601SKris Buschelman int MatConvert_SeqAIJ_SuperLU(Mat A,MatType type,Mat *newmat) { 508fb3e25aaSKris Buschelman /* This routine is only called to convert to MATSUPERLU */ 509fb3e25aaSKris Buschelman /* from MATSEQAIJ, so we will ignore 'MatType type'. */ 510b0592601SKris Buschelman int ierr; 511b0592601SKris Buschelman Mat B=*newmat; 512f0c56d0fSKris Buschelman Mat_SuperLU *lu; 51314b396bbSKris Buschelman 51414b396bbSKris Buschelman PetscFunctionBegin; 515b0592601SKris Buschelman if (B != A) { 516b0592601SKris Buschelman ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 517b0592601SKris Buschelman } 51814b396bbSKris Buschelman 519f0c56d0fSKris Buschelman ierr = PetscNew(Mat_SuperLU,&lu);CHKERRQ(ierr); 520f0c56d0fSKris Buschelman lu->MatDuplicate = A->ops->duplicate; 52114b396bbSKris Buschelman lu->MatView = A->ops->view; 52214b396bbSKris Buschelman lu->MatAssemblyEnd = A->ops->assemblyend; 523b0592601SKris Buschelman lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 52414b396bbSKris Buschelman lu->MatDestroy = A->ops->destroy; 52514b396bbSKris Buschelman lu->CleanUpSuperLU = PETSC_FALSE; 526b0592601SKris Buschelman 527b0592601SKris Buschelman B->spptr = (void*)lu; 528f0c56d0fSKris Buschelman B->ops->duplicate = MatDuplicate_SuperLU; 529f0c56d0fSKris Buschelman B->ops->view = MatView_SuperLU; 530f0c56d0fSKris Buschelman B->ops->assemblyend = MatAssemblyEnd_SuperLU; 531f0c56d0fSKris Buschelman B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU; 532f0c56d0fSKris Buschelman B->ops->destroy = MatDestroy_SuperLU; 533b0592601SKris Buschelman 534b0592601SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_C", 535b0592601SKris Buschelman "MatConvert_SeqAIJ_SuperLU",MatConvert_SeqAIJ_SuperLU);CHKERRQ(ierr); 536b0592601SKris Buschelman ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_seqaij_C", 537b0592601SKris Buschelman "MatConvert_SuperLU_SeqAIJ",MatConvert_SuperLU_SeqAIJ);CHKERRQ(ierr); 538fb3e25aaSKris Buschelman PetscLogInfo(0,"Using SuperLU for SeqAIJ LU factorization and solves."); 539fb3e25aaSKris Buschelman ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU);CHKERRQ(ierr); 540b0592601SKris Buschelman *newmat = B; 541b0592601SKris Buschelman PetscFunctionReturn(0); 542b0592601SKris Buschelman } 543b0592601SKris Buschelman EXTERN_C_END 544b0592601SKris Buschelman 54524b6179bSKris Buschelman /*MC 546fafad747SKris Buschelman MATSUPERLU - MATSUPERLU = "superlu" - A matrix type providing direct solvers (LU) for sequential matrices 54724b6179bSKris Buschelman via the external package SuperLU. 54824b6179bSKris Buschelman 54924b6179bSKris Buschelman If SuperLU is installed (see the manual for 55024b6179bSKris Buschelman instructions on how to declare the existence of external packages), 55124b6179bSKris Buschelman a matrix type can be constructed which invokes SuperLU solvers. 55224b6179bSKris Buschelman After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU). 55324b6179bSKris Buschelman This matrix type is only supported for double precision real. 55424b6179bSKris Buschelman 55524b6179bSKris Buschelman This matrix inherits from MATSEQAIJ. As a result, MatSeqAIJSetPreallocation is 55628b08bd3SKris Buschelman supported for this matrix type. One can also call MatConvert for an inplace conversion to or from 55728b08bd3SKris Buschelman the MATSEQAIJ type without data copy. 55824b6179bSKris Buschelman 55924b6179bSKris Buschelman Options Database Keys: 5600bad9183SKris Buschelman + -mat_type superlu - sets the matrix type to "superlu" during a call to MatSetFromOptions() 56124b6179bSKris Buschelman - -mat_superlu_ordering <0,1,2,3> - 0: natural ordering, 56224b6179bSKris Buschelman 1: MMD applied to A'*A, 56324b6179bSKris Buschelman 2: MMD applied to A'+A, 56424b6179bSKris Buschelman 3: COLAMD, approximate minimum degree column ordering 56524b6179bSKris Buschelman 56624b6179bSKris Buschelman Level: beginner 56724b6179bSKris Buschelman 56824b6179bSKris Buschelman .seealso: PCLU 56924b6179bSKris Buschelman M*/ 57024b6179bSKris Buschelman 571b0592601SKris Buschelman EXTERN_C_BEGIN 572b0592601SKris Buschelman #undef __FUNCT__ 573f0c56d0fSKris Buschelman #define __FUNCT__ "MatCreate_SuperLU" 574f0c56d0fSKris Buschelman int MatCreate_SuperLU(Mat A) { 575b0592601SKris Buschelman int ierr; 576b0592601SKris Buschelman 577b0592601SKris Buschelman PetscFunctionBegin; 5785441df8eSKris Buschelman /* Change type name before calling MatSetType to force proper construction of SeqAIJ and SUPERLU types */ 5795441df8eSKris Buschelman ierr = PetscObjectChangeTypeName((PetscObject)A,MATSUPERLU);CHKERRQ(ierr); 580b0592601SKris Buschelman ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 581b0592601SKris Buschelman ierr = MatConvert_SeqAIJ_SuperLU(A,MATSUPERLU,&A);CHKERRQ(ierr); 58214b396bbSKris Buschelman PetscFunctionReturn(0); 58314b396bbSKris Buschelman } 58414b396bbSKris Buschelman EXTERN_C_END 585