1 /*$Id: superlu.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 3 /* 4 Provides an interface to the SuperLU sparse solver 5 Modified for SuperLU 2.0 by Matthew Knepley 6 */ 7 8 #include "src/mat/impls/aij/seq/aij.h" 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #include "zsp_defs.h" 13 #else 14 #include "dsp_defs.h" 15 #endif 16 #include "util.h" 17 EXTERN_C_END 18 19 typedef struct { 20 SuperMatrix A,B,AC,L,U; 21 int *perm_r,*perm_c,ispec,relax,panel_size; 22 double pivot_threshold; 23 NCformat *store; 24 MatStructure flg; 25 PetscTruth SuperluMatOdering; 26 27 /* A few function pointers for inheritance */ 28 int (*MatView)(Mat,PetscViewer); 29 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 30 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 31 int (*MatDestroy)(Mat); 32 33 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 34 PetscTruth CleanUpSuperLU; 35 } Mat_SeqAIJ_SuperLU; 36 37 38 EXTERN int MatSeqAIJFactorInfo_SuperLU(Mat,PetscViewer); 39 EXTERN int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat,IS,IS,MatFactorInfo*,Mat*); 40 41 EXTERN_C_BEGIN 42 EXTERN int MatConvert_SuperLU_SeqAIJ(Mat,MatType,Mat*); 43 EXTERN int MatConvert_SeqAIJ_SuperLU(Mat,MatType,Mat*); 44 EXTERN_C_END 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatDestroy_SeqAIJ_SuperLU" 48 int MatDestroy_SeqAIJ_SuperLU(Mat A) 49 { 50 int ierr; 51 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 52 int (*destroy)(Mat)=lu->MatDestroy; 53 54 PetscFunctionBegin; 55 ierr = MatConvert_SuperLU_SeqAIJ(A,MATSEQAIJ,&A);CHKERRQ(ierr); 56 ierr = (*destroy)(A);CHKERRQ(ierr); 57 PetscFunctionReturn(0); 58 } 59 60 #undef __FUNCT__ 61 #define __FUNCT__ "MatView_SeqAIJ_Spooles" 62 int MatView_SeqAIJ_SuperLU(Mat A,PetscViewer viewer) 63 { 64 int ierr; 65 PetscTruth isascii; 66 PetscViewerFormat format; 67 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU*)(A->spptr); 68 69 PetscFunctionBegin; 70 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 71 72 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 73 if (isascii) { 74 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 75 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 76 ierr = MatSeqAIJFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr); 77 } 78 } 79 PetscFunctionReturn(0); 80 } 81 82 #undef __FUNCT__ 83 #define __FUNCT__ "MatAssemblyEnd_SeqAIJ_SuperLU" 84 int MatAssemblyEnd_SeqAIJ_SuperLU(Mat A,MatAssemblyType mode) { 85 int ierr; 86 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU*)(A->spptr); 87 88 PetscFunctionBegin; 89 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 90 91 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 92 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU; 93 PetscFunctionReturn(0); 94 } 95 96 #include "src/mat/impls/dense/seq/dense.h" 97 #undef __FUNCT__ 98 #define __FUNCT__ "MatCreateNull_SeqAIJ_SuperLU" 99 int MatCreateNull_SeqAIJ_SuperLU(Mat A,Mat *nullMat) 100 { 101 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 102 int numRows = A->m,numCols = A->n; 103 SCformat *Lstore; 104 int numNullCols,size; 105 #if defined(PETSC_USE_COMPLEX) 106 doublecomplex *nullVals,*workVals; 107 #else 108 PetscScalar *nullVals,*workVals; 109 #endif 110 int row,newRow,col,newCol,block,b,ierr; 111 112 PetscFunctionBegin; 113 PetscValidHeaderSpecific(A,MAT_COOKIE); 114 PetscValidPointer(nullMat); 115 if (!A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Unfactored matrix"); 116 numNullCols = numCols - numRows; 117 if (numNullCols < 0) SETERRQ(PETSC_ERR_ARG_WRONG,"Function only applies to underdetermined problems"); 118 /* Create the null matrix */ 119 ierr = MatCreateSeqDense(A->comm,numRows,numNullCols,PETSC_NULL,nullMat);CHKERRQ(ierr); 120 if (numNullCols == 0) { 121 ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 122 ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 123 PetscFunctionReturn(0); 124 } 125 #if defined(PETSC_USE_COMPLEX) 126 nullVals = (doublecomplex*)((Mat_SeqDense*)(*nullMat)->data)->v; 127 #else 128 nullVals = ((Mat_SeqDense*)(*nullMat)->data)->v; 129 #endif 130 /* Copy in the columns */ 131 Lstore = (SCformat*)lu->L.Store; 132 for(block = 0; block <= Lstore->nsuper; block++) { 133 newRow = Lstore->sup_to_col[block]; 134 size = Lstore->sup_to_col[block+1] - Lstore->sup_to_col[block]; 135 for(col = Lstore->rowind_colptr[newRow]; col < Lstore->rowind_colptr[newRow+1]; col++) { 136 newCol = Lstore->rowind[col]; 137 if (newCol >= numRows) { 138 for(b = 0; b < size; b++) 139 #if defined(PETSC_USE_COMPLEX) 140 nullVals[(newCol-numRows)*numRows+newRow+b] = ((doublecomplex*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col]; 141 #else 142 nullVals[(newCol-numRows)*numRows+newRow+b] = ((double*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col]; 143 #endif 144 } 145 } 146 } 147 /* Permute rhs to form P^T_c B */ 148 ierr = PetscMalloc(numRows*sizeof(double),&workVals);CHKERRQ(ierr); 149 for(b = 0; b < numNullCols; b++) { 150 for(row = 0; row < numRows; row++) workVals[lu->perm_c[row]] = nullVals[b*numRows+row]; 151 for(row = 0; row < numRows; row++) nullVals[b*numRows+row] = workVals[row]; 152 } 153 /* Backward solve the upper triangle A x = b */ 154 for(b = 0; b < numNullCols; b++) { 155 #if defined(PETSC_USE_COMPLEX) 156 sp_ztrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr); 157 #else 158 sp_dtrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr); 159 #endif 160 if (ierr < 0) 161 SETERRQ1(PETSC_ERR_ARG_WRONG,"The argument %d was invalid",-ierr); 162 } 163 ierr = PetscFree(workVals);CHKERRQ(ierr); 164 165 ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 166 ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 167 PetscFunctionReturn(0); 168 } 169 170 #undef __FUNCT__ 171 #define __FUNCT__ "MatSolve_SeqAIJ_SuperLU" 172 int MatSolve_SeqAIJ_SuperLU(Mat A,Vec b,Vec x) 173 { 174 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 175 PetscScalar *array; 176 int m,ierr; 177 178 PetscFunctionBegin; 179 ierr = VecGetLocalSize(b,&m);CHKERRQ(ierr); 180 ierr = VecCopy(b,x);CHKERRQ(ierr); 181 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 182 /* Create the Rhs */ 183 lu->B.Stype = SLU_DN; 184 lu->B.Mtype = SLU_GE; 185 lu->B.nrow = m; 186 lu->B.ncol = 1; 187 ((DNformat*)lu->B.Store)->lda = m; 188 ((DNformat*)lu->B.Store)->nzval = array; 189 #if defined(PETSC_USE_COMPLEX) 190 lu->B.Dtype = SLU_Z; 191 zgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr); 192 #else 193 lu->B.Dtype = SLU_D; 194 dgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr); 195 #endif 196 if (ierr < 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr); 197 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 198 PetscFunctionReturn(0); 199 } 200 201 static int StatInitCalled = 0; 202 203 #undef __FUNCT__ 204 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_SuperLU" 205 int MatLUFactorNumeric_SeqAIJ_SuperLU(Mat A,Mat *F) 206 { 207 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(A)->data; 208 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)(*F)->spptr; 209 int *etree,ierr; 210 PetscTruth flag; 211 212 PetscFunctionBegin; 213 /* Create the SuperMatrix for A^T: 214 Since SuperLU only likes column-oriented matrices,we pass it the transpose, 215 and then solve A^T X = B in MatSolve(). 216 */ 217 218 if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numerical factorization */ 219 lu->A.Stype = SLU_NC; 220 #if defined(PETSC_USE_COMPLEX) 221 lu->A.Dtype = SLU_Z; 222 #else 223 lu->A.Dtype = SLU_D; 224 #endif 225 lu->A.Mtype = SLU_GE; 226 lu->A.nrow = A->n; 227 lu->A.ncol = A->m; 228 229 ierr = PetscMalloc(sizeof(NCformat),&lu->store);CHKERRQ(ierr); 230 ierr = PetscMalloc(sizeof(DNformat),&lu->B.Store);CHKERRQ(ierr); 231 } 232 lu->store->nnz = aa->nz; 233 lu->store->colptr = aa->i; 234 lu->store->rowind = aa->j; 235 lu->store->nzval = aa->a; 236 lu->A.Store = lu->store; 237 238 /* Set SuperLU options */ 239 lu->relax = sp_ienv(2); 240 lu->panel_size = sp_ienv(1); 241 /* We have to initialize global data or SuperLU crashes (sucky design) */ 242 if (!StatInitCalled) { 243 StatInit(lu->panel_size,lu->relax); 244 } 245 StatInitCalled++; 246 247 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU Options","Mat");CHKERRQ(ierr); 248 /* use SuperLU mat ordeing */ 249 ierr = PetscOptionsInt("-mat_superlu_ordering","SuperLU ordering type (one of 0, 1, 2, 3)\n 0: natural ordering;\n 1: MMD applied to A'*A;\n 2: MMD applied to A'+A;\n 3: COLAMD, approximate minimum degree column ordering","None",lu->ispec,&lu->ispec,&flag);CHKERRQ(ierr); 250 if (flag) { 251 get_perm_c(lu->ispec, &lu->A, lu->perm_c); 252 lu->SuperluMatOdering = PETSC_TRUE; 253 } 254 PetscOptionsEnd(); 255 256 /* Create the elimination tree */ 257 ierr = PetscMalloc(A->n*sizeof(int),&etree);CHKERRQ(ierr); 258 sp_preorder("N",&lu->A,lu->perm_c,etree,&lu->AC); 259 /* Factor the matrix */ 260 #if defined(PETSC_USE_COMPLEX) 261 zgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr); 262 #else 263 dgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr); 264 #endif 265 if (ierr < 0) { 266 SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr); 267 } else if (ierr > 0) { 268 if (ierr <= A->m) { 269 SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element %d of U is exactly zero",ierr); 270 } else { 271 SETERRQ1(PETSC_ERR_ARG_WRONG,"Memory allocation failure after %d bytes were allocated",ierr-A->m); 272 } 273 } 274 275 /* Cleanup */ 276 ierr = PetscFree(etree);CHKERRQ(ierr); 277 278 lu->flg = SAME_NONZERO_PATTERN; 279 PetscFunctionReturn(0); 280 } 281 282 /* 283 Note the r permutation is ignored 284 */ 285 #undef __FUNCT__ 286 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_SuperLU" 287 int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 288 { 289 Mat B; 290 Mat_SeqAIJ_SuperLU *lu; 291 int ierr,*ca; 292 293 PetscFunctionBegin; 294 295 ierr = MatCreate(A->comm,A->m,A->n,PETSC_DETERMINE,PETSC_DETERMINE,&B);CHKERRQ(ierr); 296 ierr = MatSetType(B,MATSUPERLU);CHKERRQ(ierr); 297 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 298 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_SuperLU; 299 B->ops->solve = MatSolve_SeqAIJ_SuperLU; 300 B->factor = FACTOR_LU; 301 B->assembled = PETSC_TRUE; /* required by -sles_view */ 302 303 lu = (Mat_SeqAIJ_SuperLU*)(B->spptr); 304 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCreateNull","MatCreateNull_SeqAIJ_SuperLU", 305 (void(*)(void))MatCreateNull_SeqAIJ_SuperLU);CHKERRQ(ierr); 306 307 /* Allocate the work arrays required by SuperLU (notice sizes are for the transpose) */ 308 ierr = PetscMalloc(A->n*sizeof(int),&lu->perm_r);CHKERRQ(ierr); 309 ierr = PetscMalloc(A->m*sizeof(int),&lu->perm_c);CHKERRQ(ierr); 310 311 /* use PETSc mat ordering */ 312 ierr = ISGetIndices(c,&ca);CHKERRQ(ierr); 313 ierr = PetscMemcpy(lu->perm_c,ca,A->m*sizeof(int));CHKERRQ(ierr); 314 ierr = ISRestoreIndices(c,&ca);CHKERRQ(ierr); 315 lu->SuperluMatOdering = PETSC_FALSE; 316 317 lu->pivot_threshold = info->dtcol; 318 PetscLogObjectMemory(B,(A->m+A->n)*sizeof(int)+sizeof(Mat_SeqAIJ_SuperLU)); 319 320 lu->flg = DIFFERENT_NONZERO_PATTERN; 321 lu->CleanUpSuperLU = PETSC_TRUE; 322 323 *F = B; 324 PetscFunctionReturn(0); 325 } 326 327 /* used by -sles_view */ 328 #undef __FUNCT__ 329 #define __FUNCT__ "MatSeqAIJFactorInfo_SuperLU" 330 int MatSeqAIJFactorInfo_SuperLU(Mat A,PetscViewer viewer) 331 { 332 Mat_SeqAIJ_SuperLU *lu= (Mat_SeqAIJ_SuperLU*)A->spptr; 333 int ierr; 334 PetscFunctionBegin; 335 336 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr); 337 if(lu->SuperluMatOdering) ierr = PetscViewerASCIIPrintf(viewer," SuperLU mat ordering: %d\n",lu->ispec);CHKERRQ(ierr); 338 339 PetscFunctionReturn(0); 340 } 341 342 EXTERN_C_BEGIN 343 #undef __FUNCT__ 344 #define __FUNCT__ "MatConvert_SuperLU_SeqAIJ" 345 int MatConvert_SuperLU_SeqAIJ(Mat A,MatType type,Mat *newmat) { 346 int ierr; 347 Mat B=*newmat; 348 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU *)A->spptr; 349 350 PetscFunctionBegin; 351 if (B != A) { 352 /* This routine was inherited from SeqAIJ. */ 353 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 354 } else { 355 if (lu->CleanUpSuperLU) { 356 /* We have to free the global data or SuperLU crashes (sucky design)*/ 357 /* Since we don't know if more solves on other matrices may be done 358 we cannot free the yucky SuperLU global data 359 StatFree(); 360 */ 361 362 /* Free the SuperLU datastructures */ 363 Destroy_CompCol_Permuted(&lu->AC); 364 Destroy_SuperNode_Matrix(&lu->L); 365 Destroy_CompCol_Matrix(&lu->U); 366 ierr = PetscFree(lu->B.Store);CHKERRQ(ierr); 367 ierr = PetscFree(lu->A.Store);CHKERRQ(ierr); 368 ierr = PetscFree(lu->perm_r);CHKERRQ(ierr); 369 ierr = PetscFree(lu->perm_c);CHKERRQ(ierr); 370 } 371 /* Reset the original function pointers */ 372 B->ops->view = lu->MatView; 373 B->ops->assemblyend = lu->MatAssemblyEnd; 374 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 375 B->ops->destroy = lu->MatDestroy; 376 /* lu is only a function pointer stash unless we've factored the matrix, which we haven't! */ 377 ierr = PetscFree(lu);CHKERRQ(ierr); 378 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJ);CHKERRQ(ierr); 379 } 380 *newmat = B; 381 PetscFunctionReturn(0); 382 } 383 EXTERN_C_END 384 385 EXTERN_C_BEGIN 386 #undef __FUNCT__ 387 #define __FUNCT__ "MatConvert_SeqAIJ_SuperLU" 388 int MatConvert_SeqAIJ_SuperLU(Mat A,MatType type,Mat *newmat) { 389 int ierr; 390 Mat B=*newmat; 391 Mat_SeqAIJ_SuperLU *lu; 392 393 PetscFunctionBegin; 394 if (B != A) { 395 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 396 } 397 398 ierr = PetscNew(Mat_SeqAIJ_SuperLU,&lu);CHKERRQ(ierr); 399 lu->MatView = A->ops->view; 400 lu->MatAssemblyEnd = A->ops->assemblyend; 401 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 402 lu->MatDestroy = A->ops->destroy; 403 lu->CleanUpSuperLU = PETSC_FALSE; 404 405 B->spptr = (void*)lu; 406 B->ops->view = MatView_SeqAIJ_SuperLU; 407 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ_SuperLU; 408 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU; 409 B->ops->destroy = MatDestroy_SeqAIJ_SuperLU; 410 411 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_C", 412 "MatConvert_SeqAIJ_SuperLU",MatConvert_SeqAIJ_SuperLU);CHKERRQ(ierr); 413 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_seqaij_C", 414 "MatConvert_SuperLU_SeqAIJ",MatConvert_SuperLU_SeqAIJ);CHKERRQ(ierr); 415 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 416 *newmat = B; 417 PetscFunctionReturn(0); 418 } 419 EXTERN_C_END 420 421 EXTERN_C_BEGIN 422 #undef __FUNCT__ 423 #define __FUNCT__ "MatCreate_SeqAIJ_SuperLU" 424 int MatCreate_SeqAIJ_SuperLU(Mat A) { 425 int ierr; 426 427 PetscFunctionBegin; 428 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 429 ierr = MatConvert_SeqAIJ_SuperLU(A,MATSUPERLU,&A);CHKERRQ(ierr); 430 PetscFunctionReturn(0); 431 } 432 EXTERN_C_END 433