1 /*$Id: superlu.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 3 /* 4 Provides an interface to the SuperLU sparse solver 5 Modified for SuperLU 2.0 by Matthew Knepley 6 */ 7 8 #include "src/mat/impls/aij/seq/aij.h" 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #include "zsp_defs.h" 13 #else 14 #include "dsp_defs.h" 15 #endif 16 #include "util.h" 17 EXTERN_C_END 18 19 typedef struct { 20 SuperMatrix A,B,AC,L,U; 21 int *perm_r,*perm_c,ispec,relax,panel_size; 22 double pivot_threshold; 23 NCformat *store; 24 MatStructure flg; 25 PetscTruth SuperluMatOdering; 26 27 /* A few function pointers for inheritance */ 28 int (*MatView)(Mat,PetscViewer); 29 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 30 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 31 int (*MatDestroy)(Mat); 32 33 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 34 PetscTruth CleanUpSuperLU; 35 } Mat_SeqAIJ_SuperLU; 36 37 38 EXTERN int MatSeqAIJFactorInfo_SuperLU(Mat,PetscViewer); 39 EXTERN int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat,IS,IS,MatFactorInfo*,Mat*); 40 41 EXTERN_C_BEGIN 42 EXTERN int MatConvert_SuperLU_SeqAIJ(Mat,MatType,Mat*); 43 EXTERN int MatConvert_SeqAIJ_SuperLU(Mat,MatType,Mat*); 44 EXTERN_C_END 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatDestroy_SeqAIJ_SuperLU" 48 int MatDestroy_SeqAIJ_SuperLU(Mat A) 49 { 50 int ierr; 51 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 52 53 PetscFunctionBegin; 54 if (lu->CleanUpSuperLU) { 55 /* We have to free the global data or SuperLU crashes (sucky design)*/ 56 /* Since we don't know if more solves on other matrices may be done 57 we cannot free the yucky SuperLU global data 58 StatFree(); 59 */ 60 61 /* Free the SuperLU datastructures */ 62 Destroy_CompCol_Permuted(&lu->AC); 63 Destroy_SuperNode_Matrix(&lu->L); 64 Destroy_CompCol_Matrix(&lu->U); 65 ierr = PetscFree(lu->B.Store);CHKERRQ(ierr); 66 ierr = PetscFree(lu->A.Store);CHKERRQ(ierr); 67 ierr = PetscFree(lu->perm_r);CHKERRQ(ierr); 68 ierr = PetscFree(lu->perm_c);CHKERRQ(ierr); 69 } 70 ierr = MatConvert_SuperLU_SeqAIJ(A,MATSEQAIJ,&A);CHKERRQ(ierr); 71 ierr = (*A->ops->destroy)(A);CHKERRQ(ierr); 72 PetscFunctionReturn(0); 73 } 74 75 #undef __FUNCT__ 76 #define __FUNCT__ "MatView_SeqAIJ_Spooles" 77 int MatView_SeqAIJ_SuperLU(Mat A,PetscViewer viewer) 78 { 79 int ierr; 80 PetscTruth isascii; 81 PetscViewerFormat format; 82 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU*)(A->spptr); 83 84 PetscFunctionBegin; 85 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 86 87 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 88 if (isascii) { 89 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 90 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 91 ierr = MatSeqAIJFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr); 92 } 93 } 94 PetscFunctionReturn(0); 95 } 96 97 #undef __FUNCT__ 98 #define __FUNCT__ "MatAssemblyEnd_SeqAIJ_SuperLU" 99 int MatAssemblyEnd_SeqAIJ_SuperLU(Mat A,MatAssemblyType mode) { 100 int ierr; 101 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU*)(A->spptr); 102 103 PetscFunctionBegin; 104 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 105 106 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 107 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU; 108 PetscFunctionReturn(0); 109 } 110 111 #include "src/mat/impls/dense/seq/dense.h" 112 #undef __FUNCT__ 113 #define __FUNCT__ "MatCreateNull_SeqAIJ_SuperLU" 114 int MatCreateNull_SeqAIJ_SuperLU(Mat A,Mat *nullMat) 115 { 116 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 117 int numRows = A->m,numCols = A->n; 118 SCformat *Lstore; 119 int numNullCols,size; 120 #if defined(PETSC_USE_COMPLEX) 121 doublecomplex *nullVals,*workVals; 122 #else 123 PetscScalar *nullVals,*workVals; 124 #endif 125 int row,newRow,col,newCol,block,b,ierr; 126 127 PetscFunctionBegin; 128 PetscValidHeaderSpecific(A,MAT_COOKIE); 129 PetscValidPointer(nullMat); 130 if (!A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Unfactored matrix"); 131 numNullCols = numCols - numRows; 132 if (numNullCols < 0) SETERRQ(PETSC_ERR_ARG_WRONG,"Function only applies to underdetermined problems"); 133 /* Create the null matrix */ 134 ierr = MatCreateSeqDense(A->comm,numRows,numNullCols,PETSC_NULL,nullMat);CHKERRQ(ierr); 135 if (numNullCols == 0) { 136 ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 137 ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 138 PetscFunctionReturn(0); 139 } 140 #if defined(PETSC_USE_COMPLEX) 141 nullVals = (doublecomplex*)((Mat_SeqDense*)(*nullMat)->data)->v; 142 #else 143 nullVals = ((Mat_SeqDense*)(*nullMat)->data)->v; 144 #endif 145 /* Copy in the columns */ 146 Lstore = (SCformat*)lu->L.Store; 147 for(block = 0; block <= Lstore->nsuper; block++) { 148 newRow = Lstore->sup_to_col[block]; 149 size = Lstore->sup_to_col[block+1] - Lstore->sup_to_col[block]; 150 for(col = Lstore->rowind_colptr[newRow]; col < Lstore->rowind_colptr[newRow+1]; col++) { 151 newCol = Lstore->rowind[col]; 152 if (newCol >= numRows) { 153 for(b = 0; b < size; b++) 154 #if defined(PETSC_USE_COMPLEX) 155 nullVals[(newCol-numRows)*numRows+newRow+b] = ((doublecomplex*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col]; 156 #else 157 nullVals[(newCol-numRows)*numRows+newRow+b] = ((double*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col]; 158 #endif 159 } 160 } 161 } 162 /* Permute rhs to form P^T_c B */ 163 ierr = PetscMalloc(numRows*sizeof(double),&workVals);CHKERRQ(ierr); 164 for(b = 0; b < numNullCols; b++) { 165 for(row = 0; row < numRows; row++) workVals[lu->perm_c[row]] = nullVals[b*numRows+row]; 166 for(row = 0; row < numRows; row++) nullVals[b*numRows+row] = workVals[row]; 167 } 168 /* Backward solve the upper triangle A x = b */ 169 for(b = 0; b < numNullCols; b++) { 170 #if defined(PETSC_USE_COMPLEX) 171 sp_ztrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr); 172 #else 173 sp_dtrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr); 174 #endif 175 if (ierr < 0) 176 SETERRQ1(PETSC_ERR_ARG_WRONG,"The argument %d was invalid",-ierr); 177 } 178 ierr = PetscFree(workVals);CHKERRQ(ierr); 179 180 ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 181 ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 182 PetscFunctionReturn(0); 183 } 184 185 #undef __FUNCT__ 186 #define __FUNCT__ "MatSolve_SeqAIJ_SuperLU" 187 int MatSolve_SeqAIJ_SuperLU(Mat A,Vec b,Vec x) 188 { 189 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 190 PetscScalar *array; 191 int m,ierr; 192 193 PetscFunctionBegin; 194 ierr = VecGetLocalSize(b,&m);CHKERRQ(ierr); 195 ierr = VecCopy(b,x);CHKERRQ(ierr); 196 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 197 /* Create the Rhs */ 198 lu->B.Stype = SLU_DN; 199 lu->B.Mtype = SLU_GE; 200 lu->B.nrow = m; 201 lu->B.ncol = 1; 202 ((DNformat*)lu->B.Store)->lda = m; 203 ((DNformat*)lu->B.Store)->nzval = array; 204 #if defined(PETSC_USE_COMPLEX) 205 lu->B.Dtype = SLU_Z; 206 zgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr); 207 #else 208 lu->B.Dtype = SLU_D; 209 dgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr); 210 #endif 211 if (ierr < 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr); 212 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 213 PetscFunctionReturn(0); 214 } 215 216 static int StatInitCalled = 0; 217 218 #undef __FUNCT__ 219 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_SuperLU" 220 int MatLUFactorNumeric_SeqAIJ_SuperLU(Mat A,Mat *F) 221 { 222 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(A)->data; 223 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)(*F)->spptr; 224 int *etree,ierr; 225 PetscTruth flag; 226 227 PetscFunctionBegin; 228 /* Create the SuperMatrix for A^T: 229 Since SuperLU only likes column-oriented matrices,we pass it the transpose, 230 and then solve A^T X = B in MatSolve(). 231 */ 232 233 if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numerical factorization */ 234 lu->A.Stype = SLU_NC; 235 #if defined(PETSC_USE_COMPLEX) 236 lu->A.Dtype = SLU_Z; 237 #else 238 lu->A.Dtype = SLU_D; 239 #endif 240 lu->A.Mtype = SLU_GE; 241 lu->A.nrow = A->n; 242 lu->A.ncol = A->m; 243 244 ierr = PetscMalloc(sizeof(NCformat),&lu->store);CHKERRQ(ierr); 245 ierr = PetscMalloc(sizeof(DNformat),&lu->B.Store);CHKERRQ(ierr); 246 } 247 lu->store->nnz = aa->nz; 248 lu->store->colptr = aa->i; 249 lu->store->rowind = aa->j; 250 lu->store->nzval = aa->a; 251 lu->A.Store = lu->store; 252 253 /* Set SuperLU options */ 254 lu->relax = sp_ienv(2); 255 lu->panel_size = sp_ienv(1); 256 /* We have to initialize global data or SuperLU crashes (sucky design) */ 257 if (!StatInitCalled) { 258 StatInit(lu->panel_size,lu->relax); 259 } 260 StatInitCalled++; 261 262 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU Options","Mat");CHKERRQ(ierr); 263 /* use SuperLU mat ordeing */ 264 ierr = PetscOptionsInt("-mat_superlu_ordering","SuperLU ordering type (one of 0, 1, 2, 3)\n 0: natural ordering;\n 1: MMD applied to A'*A;\n 2: MMD applied to A'+A;\n 3: COLAMD, approximate minimum degree column ordering","None",lu->ispec,&lu->ispec,&flag);CHKERRQ(ierr); 265 if (flag) { 266 get_perm_c(lu->ispec, &lu->A, lu->perm_c); 267 lu->SuperluMatOdering = PETSC_TRUE; 268 } 269 PetscOptionsEnd(); 270 271 /* Create the elimination tree */ 272 ierr = PetscMalloc(A->n*sizeof(int),&etree);CHKERRQ(ierr); 273 sp_preorder("N",&lu->A,lu->perm_c,etree,&lu->AC); 274 /* Factor the matrix */ 275 #if defined(PETSC_USE_COMPLEX) 276 zgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr); 277 #else 278 dgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr); 279 #endif 280 if (ierr < 0) { 281 SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr); 282 } else if (ierr > 0) { 283 if (ierr <= A->m) { 284 SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element %d of U is exactly zero",ierr); 285 } else { 286 SETERRQ1(PETSC_ERR_ARG_WRONG,"Memory allocation failure after %d bytes were allocated",ierr-A->m); 287 } 288 } 289 290 /* Cleanup */ 291 ierr = PetscFree(etree);CHKERRQ(ierr); 292 293 lu->flg = SAME_NONZERO_PATTERN; 294 PetscFunctionReturn(0); 295 } 296 297 /* 298 Note the r permutation is ignored 299 */ 300 #undef __FUNCT__ 301 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_SuperLU" 302 int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 303 { 304 Mat B; 305 Mat_SeqAIJ_SuperLU *lu; 306 int ierr,*ca; 307 308 PetscFunctionBegin; 309 310 ierr = MatCreate(A->comm,A->m,A->n,PETSC_DETERMINE,PETSC_DETERMINE,&B);CHKERRQ(ierr); 311 ierr = MatSetType(B,MATSUPERLU);CHKERRQ(ierr); 312 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 313 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_SuperLU; 314 B->ops->solve = MatSolve_SeqAIJ_SuperLU; 315 B->factor = FACTOR_LU; 316 B->assembled = PETSC_TRUE; /* required by -sles_view */ 317 318 lu = (Mat_SeqAIJ_SuperLU*)(B->spptr); 319 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCreateNull","MatCreateNull_SeqAIJ_SuperLU", 320 (void(*)(void))MatCreateNull_SeqAIJ_SuperLU);CHKERRQ(ierr); 321 322 /* Allocate the work arrays required by SuperLU (notice sizes are for the transpose) */ 323 ierr = PetscMalloc(A->n*sizeof(int),&lu->perm_r);CHKERRQ(ierr); 324 ierr = PetscMalloc(A->m*sizeof(int),&lu->perm_c);CHKERRQ(ierr); 325 326 /* use PETSc mat ordering */ 327 ierr = ISGetIndices(c,&ca);CHKERRQ(ierr); 328 ierr = PetscMemcpy(lu->perm_c,ca,A->m*sizeof(int));CHKERRQ(ierr); 329 ierr = ISRestoreIndices(c,&ca);CHKERRQ(ierr); 330 lu->SuperluMatOdering = PETSC_FALSE; 331 332 lu->pivot_threshold = info->dtcol; 333 PetscLogObjectMemory(B,(A->m+A->n)*sizeof(int)+sizeof(Mat_SeqAIJ_SuperLU)); 334 335 lu->flg = DIFFERENT_NONZERO_PATTERN; 336 lu->CleanUpSuperLU = PETSC_TRUE; 337 338 *F = B; 339 PetscFunctionReturn(0); 340 } 341 342 /* used by -sles_view */ 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatSeqAIJFactorInfo_SuperLU" 345 int MatSeqAIJFactorInfo_SuperLU(Mat A,PetscViewer viewer) 346 { 347 Mat_SeqAIJ_SuperLU *lu= (Mat_SeqAIJ_SuperLU*)A->spptr; 348 int ierr; 349 PetscFunctionBegin; 350 351 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr); 352 if(lu->SuperluMatOdering) ierr = PetscViewerASCIIPrintf(viewer," SuperLU mat ordering: %d\n",lu->ispec);CHKERRQ(ierr); 353 354 PetscFunctionReturn(0); 355 } 356 357 EXTERN_C_BEGIN 358 #undef __FUNCT__ 359 #define __FUNCT__ "MatConvert_SuperLU_SeqAIJ" 360 int MatConvert_SuperLU_SeqAIJ(Mat A,MatType type,Mat *newmat) { 361 /* This routine is only called to convert an unfactored PETSc-SuperLU matrix */ 362 /* to its base PETSc type, so we will ignore 'MatType type'. */ 363 int ierr; 364 Mat B=*newmat; 365 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU *)A->spptr; 366 367 PetscFunctionBegin; 368 if (B != A) { 369 /* This routine was inherited from SeqAIJ. */ 370 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 371 } else { 372 /* Reset the original function pointers */ 373 B->ops->view = lu->MatView; 374 B->ops->assemblyend = lu->MatAssemblyEnd; 375 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 376 B->ops->destroy = lu->MatDestroy; 377 /* lu is only a function pointer stash unless we've factored the matrix, which we haven't! */ 378 ierr = PetscFree(lu);CHKERRQ(ierr); 379 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJ);CHKERRQ(ierr); 380 } 381 *newmat = B; 382 PetscFunctionReturn(0); 383 } 384 EXTERN_C_END 385 386 EXTERN_C_BEGIN 387 #undef __FUNCT__ 388 #define __FUNCT__ "MatConvert_SeqAIJ_SuperLU" 389 int MatConvert_SeqAIJ_SuperLU(Mat A,MatType type,Mat *newmat) { 390 /* This routine is only called to convert to MATSUPERLU */ 391 /* from MATSEQAIJ, so we will ignore 'MatType type'. */ 392 int ierr; 393 Mat B=*newmat; 394 Mat_SeqAIJ_SuperLU *lu; 395 396 PetscFunctionBegin; 397 if (B != A) { 398 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 399 } 400 401 ierr = PetscNew(Mat_SeqAIJ_SuperLU,&lu);CHKERRQ(ierr); 402 lu->MatView = A->ops->view; 403 lu->MatAssemblyEnd = A->ops->assemblyend; 404 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 405 lu->MatDestroy = A->ops->destroy; 406 lu->CleanUpSuperLU = PETSC_FALSE; 407 408 B->spptr = (void*)lu; 409 B->ops->view = MatView_SeqAIJ_SuperLU; 410 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ_SuperLU; 411 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU; 412 B->ops->destroy = MatDestroy_SeqAIJ_SuperLU; 413 414 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_C", 415 "MatConvert_SeqAIJ_SuperLU",MatConvert_SeqAIJ_SuperLU);CHKERRQ(ierr); 416 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_seqaij_C", 417 "MatConvert_SuperLU_SeqAIJ",MatConvert_SuperLU_SeqAIJ);CHKERRQ(ierr); 418 PetscLogInfo(0,"Using SuperLU for SeqAIJ LU factorization and solves."); 419 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSUPERLU);CHKERRQ(ierr); 420 *newmat = B; 421 PetscFunctionReturn(0); 422 } 423 EXTERN_C_END 424 425 EXTERN_C_BEGIN 426 #undef __FUNCT__ 427 #define __FUNCT__ "MatCreate_SeqAIJ_SuperLU" 428 int MatCreate_SeqAIJ_SuperLU(Mat A) { 429 int ierr; 430 431 PetscFunctionBegin; 432 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 433 ierr = MatConvert_SeqAIJ_SuperLU(A,MATSUPERLU,&A);CHKERRQ(ierr); 434 PetscFunctionReturn(0); 435 } 436 EXTERN_C_END 437