1 /*$Id: superlu.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 3 /* 4 Provides an interface to the SuperLU sparse solver 5 Modified for SuperLU 2.0 by Matthew Knepley 6 */ 7 8 #include "src/mat/impls/aij/seq/aij.h" 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #include "zsp_defs.h" 13 #else 14 #include "dsp_defs.h" 15 #endif 16 #include "util.h" 17 EXTERN_C_END 18 19 typedef struct { 20 SuperMatrix A,B,AC,L,U; 21 int *perm_r,*perm_c,ispec,relax,panel_size; 22 double pivot_threshold; 23 NCformat *store; 24 MatStructure flg; 25 PetscTruth SuperluMatOdering; 26 27 /* A few function pointers for inheritance */ 28 int (*MatView)(Mat,PetscViewer); 29 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 30 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 31 int (*MatDestroy)(Mat); 32 33 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 34 PetscTruth CleanUpSuperLU; 35 } Mat_SeqAIJ_SuperLU; 36 37 38 EXTERN int MatSeqAIJFactorInfo_SuperLU(Mat,PetscViewer); 39 EXTERN int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat,IS,IS,MatFactorInfo*,Mat*); 40 41 EXTERN_C_BEGIN 42 EXTERN int MatConvert_SuperLU_SeqAIJ(Mat,MatType,Mat*); 43 EXTERN int MatConvert_SeqAIJ_SuperLU(Mat,MatType,Mat*); 44 EXTERN_C_END 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatDestroy_SeqAIJ_SuperLU" 48 int MatDestroy_SeqAIJ_SuperLU(Mat A) 49 { 50 int ierr; 51 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 52 53 PetscFunctionBegin; 54 if (lu->CleanUpSuperLU) { 55 /* We have to free the global data or SuperLU crashes (sucky design)*/ 56 /* Since we don't know if more solves on other matrices may be done 57 we cannot free the yucky SuperLU global data 58 StatFree(); 59 */ 60 61 /* Free the SuperLU datastructures */ 62 Destroy_CompCol_Permuted(&lu->AC); 63 Destroy_SuperNode_Matrix(&lu->L); 64 Destroy_CompCol_Matrix(&lu->U); 65 ierr = PetscFree(lu->B.Store);CHKERRQ(ierr); 66 ierr = PetscFree(lu->A.Store);CHKERRQ(ierr); 67 ierr = PetscFree(lu->perm_r);CHKERRQ(ierr); 68 ierr = PetscFree(lu->perm_c);CHKERRQ(ierr); 69 } 70 ierr = MatConvert_SuperLU_SeqAIJ(A,MATSEQAIJ,&A);CHKERRQ(ierr); 71 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatView_SeqAIJ_Spooles" 77 int MatView_SeqAIJ_SuperLU(Mat A,PetscViewer viewer) 78 { 79 int ierr; 80 PetscTruth isascii; 81 PetscViewerFormat format; 82 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU*)(A->spptr); 83 84 PetscFunctionBegin; 85 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 86 87 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 88 if (isascii) { 89 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 90 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 91 ierr = MatSeqAIJFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr); 92 } 93 } 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "MatAssemblyEnd_SeqAIJ_SuperLU" 99 int MatAssemblyEnd_SeqAIJ_SuperLU(Mat A,MatAssemblyType mode) { 100 int ierr; 101 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU*)(A->spptr); 102 103 PetscFunctionBegin; 104 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 105 106 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 107 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU; 108 PetscFunctionReturn(0); 109 } 110 111 #include "src/mat/impls/dense/seq/dense.h" 112 #undef __FUNCT__ 113 #define __FUNCT__ "MatCreateNull_SeqAIJ_SuperLU" 114 int MatCreateNull_SeqAIJ_SuperLU(Mat A,Mat *nullMat) 115 { 116 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 117 int numRows = A->m,numCols = A->n; 118 SCformat *Lstore; 119 int numNullCols,size; 120 #if defined(PETSC_USE_COMPLEX) 121 doublecomplex *nullVals,*workVals; 122 #else 123 PetscScalar *nullVals,*workVals; 124 #endif 125 int row,newRow,col,newCol,block,b,ierr; 126 127 PetscFunctionBegin; 128 PetscValidHeaderSpecific(A,MAT_COOKIE); 129 PetscValidPointer(nullMat); 130 if (!A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Unfactored matrix"); 131 numNullCols = numCols - numRows; 132 if (numNullCols < 0) SETERRQ(PETSC_ERR_ARG_WRONG,"Function only applies to underdetermined problems"); 133 /* Create the null matrix */ 134 ierr = MatCreateSeqDense(A->comm,numRows,numNullCols,PETSC_NULL,nullMat);CHKERRQ(ierr); 135 if (numNullCols == 0) { 136 ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 137 ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 #if defined(PETSC_USE_COMPLEX) 141 nullVals = (doublecomplex*)((Mat_SeqDense*)(*nullMat)->data)->v; 142 #else 143 nullVals = ((Mat_SeqDense*)(*nullMat)->data)->v; 144 #endif 145 /* Copy in the columns */ 146 Lstore = (SCformat*)lu->L.Store; 147 for(block = 0; block <= Lstore->nsuper; block++) { 148 newRow = Lstore->sup_to_col[block]; 149 size = Lstore->sup_to_col[block+1] - Lstore->sup_to_col[block]; 150 for(col = Lstore->rowind_colptr[newRow]; col < Lstore->rowind_colptr[newRow+1]; col++) { 151 newCol = Lstore->rowind[col]; 152 if (newCol >= numRows) { 153 for(b = 0; b < size; b++) 154 #if defined(PETSC_USE_COMPLEX) 155 nullVals[(newCol-numRows)*numRows+newRow+b] = ((doublecomplex*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col]; 156 #else 157 nullVals[(newCol-numRows)*numRows+newRow+b] = ((double*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col]; 158 #endif 159 } 160 } 161 } 162 /* Permute rhs to form P^T_c B */ 163 ierr = PetscMalloc(numRows*sizeof(double),&workVals);CHKERRQ(ierr); 164 for(b = 0; b < numNullCols; b++) { 165 for(row = 0; row < numRows; row++) workVals[lu->perm_c[row]] = nullVals[b*numRows+row]; 166 for(row = 0; row < numRows; row++) nullVals[b*numRows+row] = workVals[row]; 167 } 168 /* Backward solve the upper triangle A x = b */ 169 for(b = 0; b < numNullCols; b++) { 170 #if defined(PETSC_USE_COMPLEX) 171 sp_ztrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr); 172 #else 173 sp_dtrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr); 174 #endif 175 if (ierr < 0) 176 SETERRQ1(PETSC_ERR_ARG_WRONG,"The argument %d was invalid",-ierr); 177 } 178 ierr = PetscFree(workVals);CHKERRQ(ierr); 179 180 ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 181 ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSolve_SeqAIJ_SuperLU" 187 int MatSolve_SeqAIJ_SuperLU(Mat A,Vec b,Vec x) 188 { 189 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 190 PetscScalar *array; 191 int m,ierr; 192 193 PetscFunctionBegin; 194 ierr = VecGetLocalSize(b,&m);CHKERRQ(ierr); 195 ierr = VecCopy(b,x);CHKERRQ(ierr); 196 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 197 /* Create the Rhs */ 198 lu->B.Stype = SLU_DN; 199 lu->B.Mtype = SLU_GE; 200 lu->B.nrow = m; 201 lu->B.ncol = 1; 202 ((DNformat*)lu->B.Store)->lda = m; 203 ((DNformat*)lu->B.Store)->nzval = array; 204 #if defined(PETSC_USE_COMPLEX) 205 lu->B.Dtype = SLU_Z; 206 zgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr); 207 #else 208 lu->B.Dtype = SLU_D; 209 dgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr); 210 #endif 211 if (ierr < 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr); 212 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 213 PetscFunctionReturn(0); 214 } 215 216 static int StatInitCalled = 0; 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_SuperLU" 220 int MatLUFactorNumeric_SeqAIJ_SuperLU(Mat A,Mat *F) 221 { 222 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(A)->data; 223 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)(*F)->spptr; 224 int *etree,ierr; 225 PetscTruth flag; 226 227 PetscFunctionBegin; 228 /* Create the SuperMatrix for A^T: 229 Since SuperLU only likes column-oriented matrices,we pass it the transpose, 230 and then solve A^T X = B in MatSolve(). 231 */ 232 233 if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numerical factorization */ 234 lu->A.Stype = SLU_NC; 235 #if defined(PETSC_USE_COMPLEX) 236 lu->A.Dtype = SLU_Z; 237 #else 238 lu->A.Dtype = SLU_D; 239 #endif 240 lu->A.Mtype = SLU_GE; 241 lu->A.nrow = A->n; 242 lu->A.ncol = A->m; 243 244 ierr = PetscMalloc(sizeof(NCformat),&lu->store);CHKERRQ(ierr); 245 ierr = PetscMalloc(sizeof(DNformat),&lu->B.Store);CHKERRQ(ierr); 246 } 247 lu->store->nnz = aa->nz; 248 lu->store->colptr = aa->i; 249 lu->store->rowind = aa->j; 250 lu->store->nzval = aa->a; 251 lu->A.Store = lu->store; 252 253 /* Set SuperLU options */ 254 lu->relax = sp_ienv(2); 255 lu->panel_size = sp_ienv(1); 256 /* We have to initialize global data or SuperLU crashes (sucky design) */ 257 if (!StatInitCalled) { 258 StatInit(lu->panel_size,lu->relax); 259 } 260 StatInitCalled++; 261 262 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU Options","Mat");CHKERRQ(ierr); 263 /* use SuperLU mat ordeing */ 264 ierr = PetscOptionsInt("-mat_superlu_ordering","SuperLU ordering type (one of 0, 1, 2, 3)\n 0: natural ordering;\n 1: MMD applied to A'*A;\n 2: MMD applied to A'+A;\n 3: COLAMD, approximate minimum degree column ordering","None",lu->ispec,&lu->ispec,&flag);CHKERRQ(ierr); 265 if (flag) { 266 get_perm_c(lu->ispec, &lu->A, lu->perm_c); 267 lu->SuperluMatOdering = PETSC_TRUE; 268 } 269 PetscOptionsEnd(); 270 271 /* Create the elimination tree */ 272 ierr = PetscMalloc(A->n*sizeof(int),&etree);CHKERRQ(ierr); 273 sp_preorder("N",&lu->A,lu->perm_c,etree,&lu->AC); 274 /* Factor the matrix */ 275 #if defined(PETSC_USE_COMPLEX) 276 zgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr); 277 #else 278 dgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr); 279 #endif 280 if (ierr < 0) { 281 SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr); 282 } else if (ierr > 0) { 283 if (ierr <= A->m) { 284 SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element %d of U is exactly zero",ierr); 285 } else { 286 SETERRQ1(PETSC_ERR_ARG_WRONG,"Memory allocation failure after %d bytes were allocated",ierr-A->m); 287 } 288 } 289 290 /* Cleanup */ 291 ierr = PetscFree(etree);CHKERRQ(ierr); 292 293 lu->flg = SAME_NONZERO_PATTERN; 294 PetscFunctionReturn(0); 295 } 296 297 /* 298 Note the r permutation is ignored 299 */ 300 #undef __FUNCT__ 301 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_SuperLU" 302 int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 303 { 304 Mat B; 305 Mat_SeqAIJ_SuperLU *lu; 306 int ierr,*ca; 307 308 PetscFunctionBegin; 309 310 ierr = MatCreate(A->comm,A->m,A->n,PETSC_DETERMINE,PETSC_DETERMINE,&B);CHKERRQ(ierr); 311 ierr = MatSetType(B,MATSUPERLU);CHKERRQ(ierr); 312 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 313 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_SuperLU; 314 B->ops->solve = MatSolve_SeqAIJ_SuperLU; 315 B->factor = FACTOR_LU; 316 B->assembled = PETSC_TRUE; /* required by -sles_view */ 317 318 lu = (Mat_SeqAIJ_SuperLU*)(B->spptr); 319 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCreateNull","MatCreateNull_SeqAIJ_SuperLU", 320 (void(*)(void))MatCreateNull_SeqAIJ_SuperLU);CHKERRQ(ierr); 321 322 /* Allocate the work arrays required by SuperLU (notice sizes are for the transpose) */ 323 ierr = PetscMalloc(A->n*sizeof(int),&lu->perm_r);CHKERRQ(ierr); 324 ierr = PetscMalloc(A->m*sizeof(int),&lu->perm_c);CHKERRQ(ierr); 325 326 /* use PETSc mat ordering */ 327 ierr = ISGetIndices(c,&ca);CHKERRQ(ierr); 328 ierr = PetscMemcpy(lu->perm_c,ca,A->m*sizeof(int));CHKERRQ(ierr); 329 ierr = ISRestoreIndices(c,&ca);CHKERRQ(ierr); 330 lu->SuperluMatOdering = PETSC_FALSE; 331 332 lu->pivot_threshold = info->dtcol; 333 PetscLogObjectMemory(B,(A->m+A->n)*sizeof(int)+sizeof(Mat_SeqAIJ_SuperLU)); 334 335 lu->flg = DIFFERENT_NONZERO_PATTERN; 336 lu->CleanUpSuperLU = PETSC_TRUE; 337 338 *F = B; 339 PetscFunctionReturn(0); 340 } 341 342 /* used by -sles_view */ 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatSeqAIJFactorInfo_SuperLU" 345 int MatSeqAIJFactorInfo_SuperLU(Mat A,PetscViewer viewer) 346 { 347 Mat_SeqAIJ_SuperLU *lu= (Mat_SeqAIJ_SuperLU*)A->spptr; 348 int ierr; 349 PetscFunctionBegin; 350 351 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr); 352 if(lu->SuperluMatOdering) ierr = PetscViewerASCIIPrintf(viewer," SuperLU mat ordering: %d\n",lu->ispec);CHKERRQ(ierr); 353 354 PetscFunctionReturn(0); 355 } 356 357 EXTERN_C_BEGIN 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatConvert_SuperLU_SeqAIJ" 360 int MatConvert_SuperLU_SeqAIJ(Mat A,MatType type,Mat *newmat) { 361 /* This routine is only called to convert an unfactored PETSc-SuperLU matrix */ 362 /* to its base PETSc type, so we will ignore 'MatType type'. */ 363 int ierr; 364 Mat B=*newmat; 365 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU *)A->spptr; 366 367 PetscFunctionBegin; 368 if (B != A) { 369 /* This routine was inherited from SeqAIJ. */ 370 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 371 } else { 372 /* Reset the original function pointers */ 373 B->ops->view = lu->MatView; 374 B->ops->assemblyend = lu->MatAssemblyEnd; 375 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 376 B->ops->destroy = lu->MatDestroy; 377 /* lu is only a function pointer stash unless we've factored the matrix, which we haven't! */ 378 ierr = PetscFree(lu);CHKERRQ(ierr); 379 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJ);CHKERRQ(ierr); 380 } 381 *newmat = B; 382 PetscFunctionReturn(0); 383 } 384 EXTERN_C_END 385 386 EXTERN_C_BEGIN 387 #undef __FUNCT__ 388 #define __FUNCT__ "MatConvert_SeqAIJ_SuperLU" 389 int MatConvert_SeqAIJ_SuperLU(Mat A,MatType type,Mat *newmat) { 390 /* This routine is only called to convert to MATSUPERLU */ 391 /* from MATSEQAIJ, so we will ignore 'MatType type'. */ 392 int ierr; 393 Mat B=*newmat; 394 Mat_SeqAIJ_SuperLU *lu; 395 396 PetscFunctionBegin; 397 if (B != A) { 398 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 399 } 400 401 ierr = PetscNew(Mat_SeqAIJ_SuperLU,&lu);CHKERRQ(ierr); 402 lu->MatView = A->ops->view; 403 lu->MatAssemblyEnd = A->ops->assemblyend; 404 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 405 lu->MatDestroy = A->ops->destroy; 406 lu->CleanUpSuperLU = PETSC_FALSE; 407 408 B->spptr = (void*)lu; 409 B->ops->view = MatView_SeqAIJ_SuperLU; 410 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ_SuperLU; 411 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU; 412 B->ops->destroy = MatDestroy_SeqAIJ_SuperLU; 413 414 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_C", 415 "MatConvert_SeqAIJ_SuperLU",MatConvert_SeqAIJ_SuperLU);CHKERRQ(ierr); 416 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_seqaij_C", 417 "MatConvert_SuperLU_SeqAIJ",MatConvert_SuperLU_SeqAIJ);CHKERRQ(ierr); 418 PetscLogInfo(0,"Using SuperLU for SeqAIJ LU factorization and solves."); 419 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU);CHKERRQ(ierr); 420 *newmat = B; 421 PetscFunctionReturn(0); 422 } 423 EXTERN_C_END 424 425 /*MC 426 MATSUPERLU - a matrix type providing direct solvers (LU) for sequential matrices 427 via the external package SuperLU. 428 429 If SuperLU is installed (see the manual for 430 instructions on how to declare the existence of external packages), 431 a matrix type can be constructed which invokes SuperLU solvers. 432 After calling MatCreate(...,A), simply call MatSetType(A,MATSUPERLU). 433 This matrix type is only supported for double precision real. 434 435 This matrix inherits from MATSEQAIJ. As a result, MatSeqAIJSetPreallocation is 436 supported for this matrix type. 437 438 Options Database Keys: 439 + -mat_type superlu 440 - -mat_superlu_ordering <0,1,2,3> - 0: natural ordering, 441 1: MMD applied to A'*A, 442 2: MMD applied to A'+A, 443 3: COLAMD, approximate minimum degree column ordering 444 445 Level: beginner 446 447 .seealso: PCLU 448 M*/ 449 450 EXTERN_C_BEGIN 451 #undef __FUNCT__ 452 #define __FUNCT__ "MatCreate_SeqAIJ_SuperLU" 453 int MatCreate_SeqAIJ_SuperLU(Mat A) { 454 int ierr; 455 456 PetscFunctionBegin; 457 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 458 ierr = MatConvert_SeqAIJ_SuperLU(A,MATSUPERLU,&A);CHKERRQ(ierr); 459 PetscFunctionReturn(0); 460 } 461 EXTERN_C_END 462