1 /*$Id: superlu.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 3 /* 4 Provides an interface to the SuperLU sparse solver 5 Modified for SuperLU 2.0 by Matthew Knepley 6 */ 7 8 #include "src/mat/impls/aij/seq/aij.h" 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #include "zsp_defs.h" 13 #else 14 #include "dsp_defs.h" 15 #endif 16 #include "util.h" 17 EXTERN_C_END 18 19 typedef struct { 20 SuperMatrix A,B,AC,L,U; 21 int *perm_r,*perm_c,ispec,relax,panel_size; 22 double pivot_threshold; 23 NCformat *store; 24 MatStructure flg; 25 PetscTruth SuperluMatOdering; 26 27 /* A few function pointers for inheritance */ 28 int (*MatView)(Mat,PetscViewer); 29 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 30 int (*MatLUFactorSymbolic)(Mat,IS,IS,MatFactorInfo*,Mat*); 31 int (*MatDestroy)(Mat); 32 33 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 34 PetscTruth CleanUpSuperLU; 35 } Mat_SeqAIJ_SuperLU; 36 37 38 EXTERN int MatSeqAIJFactorInfo_SuperLU(Mat,PetscViewer); 39 EXTERN int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat,IS,IS,MatFactorInfo*,Mat*); 40 41 EXTERN_C_BEGIN 42 EXTERN int MatConvert_SuperLU_SeqAIJ(Mat,MatType,Mat*); 43 EXTERN int MatConvert_SeqAIJ_SuperLU(Mat,MatType,Mat*); 44 EXTERN_C_END 45 46 #undef __FUNCT__ 47 #define __FUNCT__ "MatDestroy_SeqAIJ_SuperLU" 48 int MatDestroy_SeqAIJ_SuperLU(Mat A) 49 { 50 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 51 int ierr,(*destroy)(Mat); 52 53 PetscFunctionBegin; 54 /* It looks like this is decreasing the reference count a second time during MatDestroy?! */ 55 /* if (--A->refct > 0)PetscFunctionReturn(0); */ 56 destroy = lu->MatDestroy; 57 ierr = MatConvert_SuperLU_SeqAIJ(A,MATSEQAIJ,&A);CHKERRQ(ierr); 58 ierr = (*destroy)(A);CHKERRQ(ierr); 59 PetscFunctionReturn(0); 60 } 61 62 #undef __FUNCT__ 63 #define __FUNCT__ "MatView_SeqAIJ_Spooles" 64 int MatView_SeqAIJ_SuperLU(Mat A,PetscViewer viewer) 65 { 66 int ierr; 67 PetscTruth isascii; 68 PetscViewerFormat format; 69 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU*)(A->spptr); 70 71 PetscFunctionBegin; 72 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 73 74 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 75 if (isascii) { 76 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 77 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 78 ierr = MatSeqAIJFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr); 79 } 80 } 81 PetscFunctionReturn(0); 82 } 83 84 #undef __FUNCT__ 85 #define __FUNCT__ "MatAssemblyEnd_SeqAIJ_SuperLU" 86 int MatAssemblyEnd_SeqAIJ_SuperLU(Mat A,MatAssemblyType mode) { 87 int ierr; 88 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU*)(A->spptr); 89 90 PetscFunctionBegin; 91 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 92 93 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 94 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU; 95 PetscFunctionReturn(0); 96 } 97 98 #include "src/mat/impls/dense/seq/dense.h" 99 #undef __FUNCT__ 100 #define __FUNCT__ "MatCreateNull_SeqAIJ_SuperLU" 101 int MatCreateNull_SeqAIJ_SuperLU(Mat A,Mat *nullMat) 102 { 103 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 104 int numRows = A->m,numCols = A->n; 105 SCformat *Lstore; 106 int numNullCols,size; 107 #if defined(PETSC_USE_COMPLEX) 108 doublecomplex *nullVals,*workVals; 109 #else 110 PetscScalar *nullVals,*workVals; 111 #endif 112 int row,newRow,col,newCol,block,b,ierr; 113 114 PetscFunctionBegin; 115 PetscValidHeaderSpecific(A,MAT_COOKIE); 116 PetscValidPointer(nullMat); 117 if (!A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Unfactored matrix"); 118 numNullCols = numCols - numRows; 119 if (numNullCols < 0) SETERRQ(PETSC_ERR_ARG_WRONG,"Function only applies to underdetermined problems"); 120 /* Create the null matrix */ 121 ierr = MatCreateSeqDense(A->comm,numRows,numNullCols,PETSC_NULL,nullMat);CHKERRQ(ierr); 122 if (numNullCols == 0) { 123 ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 124 ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 125 PetscFunctionReturn(0); 126 } 127 #if defined(PETSC_USE_COMPLEX) 128 nullVals = (doublecomplex*)((Mat_SeqDense*)(*nullMat)->data)->v; 129 #else 130 nullVals = ((Mat_SeqDense*)(*nullMat)->data)->v; 131 #endif 132 /* Copy in the columns */ 133 Lstore = (SCformat*)lu->L.Store; 134 for(block = 0; block <= Lstore->nsuper; block++) { 135 newRow = Lstore->sup_to_col[block]; 136 size = Lstore->sup_to_col[block+1] - Lstore->sup_to_col[block]; 137 for(col = Lstore->rowind_colptr[newRow]; col < Lstore->rowind_colptr[newRow+1]; col++) { 138 newCol = Lstore->rowind[col]; 139 if (newCol >= numRows) { 140 for(b = 0; b < size; b++) 141 #if defined(PETSC_USE_COMPLEX) 142 nullVals[(newCol-numRows)*numRows+newRow+b] = ((doublecomplex*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col]; 143 #else 144 nullVals[(newCol-numRows)*numRows+newRow+b] = ((double*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col]; 145 #endif 146 } 147 } 148 } 149 /* Permute rhs to form P^T_c B */ 150 ierr = PetscMalloc(numRows*sizeof(double),&workVals);CHKERRQ(ierr); 151 for(b = 0; b < numNullCols; b++) { 152 for(row = 0; row < numRows; row++) workVals[lu->perm_c[row]] = nullVals[b*numRows+row]; 153 for(row = 0; row < numRows; row++) nullVals[b*numRows+row] = workVals[row]; 154 } 155 /* Backward solve the upper triangle A x = b */ 156 for(b = 0; b < numNullCols; b++) { 157 #if defined(PETSC_USE_COMPLEX) 158 sp_ztrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr); 159 #else 160 sp_dtrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr); 161 #endif 162 if (ierr < 0) 163 SETERRQ1(PETSC_ERR_ARG_WRONG,"The argument %d was invalid",-ierr); 164 } 165 ierr = PetscFree(workVals);CHKERRQ(ierr); 166 167 ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 168 ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 169 PetscFunctionReturn(0); 170 } 171 172 #undef __FUNCT__ 173 #define __FUNCT__ "MatSolve_SeqAIJ_SuperLU" 174 int MatSolve_SeqAIJ_SuperLU(Mat A,Vec b,Vec x) 175 { 176 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 177 PetscScalar *array; 178 int m,ierr; 179 180 PetscFunctionBegin; 181 ierr = VecGetLocalSize(b,&m);CHKERRQ(ierr); 182 ierr = VecCopy(b,x);CHKERRQ(ierr); 183 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 184 /* Create the Rhs */ 185 lu->B.Stype = SLU_DN; 186 lu->B.Mtype = SLU_GE; 187 lu->B.nrow = m; 188 lu->B.ncol = 1; 189 ((DNformat*)lu->B.Store)->lda = m; 190 ((DNformat*)lu->B.Store)->nzval = array; 191 #if defined(PETSC_USE_COMPLEX) 192 lu->B.Dtype = SLU_Z; 193 zgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr); 194 #else 195 lu->B.Dtype = SLU_D; 196 dgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr); 197 #endif 198 if (ierr < 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr); 199 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 200 PetscFunctionReturn(0); 201 } 202 203 static int StatInitCalled = 0; 204 205 #undef __FUNCT__ 206 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_SuperLU" 207 int MatLUFactorNumeric_SeqAIJ_SuperLU(Mat A,Mat *F) 208 { 209 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(A)->data; 210 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)(*F)->spptr; 211 int *etree,ierr; 212 PetscTruth flag; 213 214 PetscFunctionBegin; 215 /* Create the SuperMatrix for A^T: 216 Since SuperLU only likes column-oriented matrices,we pass it the transpose, 217 and then solve A^T X = B in MatSolve(). 218 */ 219 220 if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numerical factorization */ 221 lu->A.Stype = SLU_NC; 222 #if defined(PETSC_USE_COMPLEX) 223 lu->A.Dtype = SLU_Z; 224 #else 225 lu->A.Dtype = SLU_D; 226 #endif 227 lu->A.Mtype = SLU_GE; 228 lu->A.nrow = A->n; 229 lu->A.ncol = A->m; 230 231 ierr = PetscMalloc(sizeof(NCformat),&lu->store);CHKERRQ(ierr); 232 ierr = PetscMalloc(sizeof(DNformat),&lu->B.Store);CHKERRQ(ierr); 233 } 234 lu->store->nnz = aa->nz; 235 lu->store->colptr = aa->i; 236 lu->store->rowind = aa->j; 237 lu->store->nzval = aa->a; 238 lu->A.Store = lu->store; 239 240 /* Set SuperLU options */ 241 lu->relax = sp_ienv(2); 242 lu->panel_size = sp_ienv(1); 243 /* We have to initialize global data or SuperLU crashes (sucky design) */ 244 if (!StatInitCalled) { 245 StatInit(lu->panel_size,lu->relax); 246 } 247 StatInitCalled++; 248 249 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU Options","Mat");CHKERRQ(ierr); 250 /* use SuperLU mat ordeing */ 251 ierr = PetscOptionsInt("-mat_superlu_ordering","SuperLU ordering type (one of 0, 1, 2, 3)\n 0: natural ordering;\n 1: MMD applied to A'*A;\n 2: MMD applied to A'+A;\n 3: COLAMD, approximate minimum degree column ordering","None",lu->ispec,&lu->ispec,&flag);CHKERRQ(ierr); 252 if (flag) { 253 get_perm_c(lu->ispec, &lu->A, lu->perm_c); 254 lu->SuperluMatOdering = PETSC_TRUE; 255 } 256 PetscOptionsEnd(); 257 258 /* Create the elimination tree */ 259 ierr = PetscMalloc(A->n*sizeof(int),&etree);CHKERRQ(ierr); 260 sp_preorder("N",&lu->A,lu->perm_c,etree,&lu->AC); 261 /* Factor the matrix */ 262 #if defined(PETSC_USE_COMPLEX) 263 zgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr); 264 #else 265 dgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr); 266 #endif 267 if (ierr < 0) { 268 SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr); 269 } else if (ierr > 0) { 270 if (ierr <= A->m) { 271 SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element %d of U is exactly zero",ierr); 272 } else { 273 SETERRQ1(PETSC_ERR_ARG_WRONG,"Memory allocation failure after %d bytes were allocated",ierr-A->m); 274 } 275 } 276 277 /* Cleanup */ 278 ierr = PetscFree(etree);CHKERRQ(ierr); 279 280 lu->flg = SAME_NONZERO_PATTERN; 281 PetscFunctionReturn(0); 282 } 283 284 /* 285 Note the r permutation is ignored 286 */ 287 #undef __FUNCT__ 288 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_SuperLU" 289 int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 290 { 291 Mat B; 292 Mat_SeqAIJ_SuperLU *lu; 293 int ierr,*ca; 294 295 PetscFunctionBegin; 296 297 ierr = MatCreate(A->comm,A->m,A->n,PETSC_DETERMINE,PETSC_DETERMINE,&B);CHKERRQ(ierr); 298 ierr = MatSetType(B,MATSUPERLU);CHKERRQ(ierr); 299 ierr = MatSeqAIJSetPreallocation(B,0,PETSC_NULL);CHKERRQ(ierr); 300 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_SuperLU; 301 B->ops->solve = MatSolve_SeqAIJ_SuperLU; 302 B->factor = FACTOR_LU; 303 B->assembled = PETSC_TRUE; /* required by -sles_view */ 304 305 lu = (Mat_SeqAIJ_SuperLU*)(B->spptr); 306 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCreateNull","MatCreateNull_SeqAIJ_SuperLU", 307 (void(*)(void))MatCreateNull_SeqAIJ_SuperLU);CHKERRQ(ierr); 308 309 /* Allocate the work arrays required by SuperLU (notice sizes are for the transpose) */ 310 ierr = PetscMalloc(A->n*sizeof(int),&lu->perm_r);CHKERRQ(ierr); 311 ierr = PetscMalloc(A->m*sizeof(int),&lu->perm_c);CHKERRQ(ierr); 312 313 /* use PETSc mat ordering */ 314 ierr = ISGetIndices(c,&ca);CHKERRQ(ierr); 315 ierr = PetscMemcpy(lu->perm_c,ca,A->m*sizeof(int));CHKERRQ(ierr); 316 ierr = ISRestoreIndices(c,&ca);CHKERRQ(ierr); 317 lu->SuperluMatOdering = PETSC_FALSE; 318 319 lu->pivot_threshold = info->dtcol; 320 PetscLogObjectMemory(B,(A->m+A->n)*sizeof(int)+sizeof(Mat_SeqAIJ_SuperLU)); 321 322 lu->flg = DIFFERENT_NONZERO_PATTERN; 323 lu->CleanUpSuperLU = PETSC_TRUE; 324 325 *F = B; 326 PetscFunctionReturn(0); 327 } 328 329 /* used by -sles_view */ 330 #undef __FUNCT__ 331 #define __FUNCT__ "MatSeqAIJFactorInfo_SuperLU" 332 int MatSeqAIJFactorInfo_SuperLU(Mat A,PetscViewer viewer) 333 { 334 Mat_SeqAIJ_SuperLU *lu= (Mat_SeqAIJ_SuperLU*)A->spptr; 335 int ierr; 336 PetscFunctionBegin; 337 338 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr); 339 if(lu->SuperluMatOdering) ierr = PetscViewerASCIIPrintf(viewer," SuperLU mat ordering: %d\n",lu->ispec);CHKERRQ(ierr); 340 341 PetscFunctionReturn(0); 342 } 343 344 EXTERN_C_BEGIN 345 #undef __FUNCT__ 346 #define __FUNCT__ "MatConvert_SuperLU_SeqAIJ" 347 int MatConvert_SuperLU_SeqAIJ(Mat A,MatType type,Mat *newmat) { 348 int ierr; 349 Mat B=*newmat; 350 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU *)A->spptr; 351 352 PetscFunctionBegin; 353 if (B != A) { 354 /* This routine was inherited from SeqAIJ. */ 355 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 356 } else { 357 if (lu->CleanUpSuperLU) { 358 /* We have to free the global data or SuperLU crashes (sucky design)*/ 359 /* Since we don't know if more solves on other matrices may be done 360 we cannot free the yucky SuperLU global data 361 StatFree(); 362 */ 363 364 /* Free the SuperLU datastructures */ 365 Destroy_CompCol_Permuted(&lu->AC); 366 Destroy_SuperNode_Matrix(&lu->L); 367 Destroy_CompCol_Matrix(&lu->U); 368 ierr = PetscFree(lu->B.Store);CHKERRQ(ierr); 369 ierr = PetscFree(lu->A.Store);CHKERRQ(ierr); 370 ierr = PetscFree(lu->perm_r);CHKERRQ(ierr); 371 ierr = PetscFree(lu->perm_c);CHKERRQ(ierr); 372 } 373 /* Reset the original function pointers */ 374 B->ops->view = lu->MatView; 375 B->ops->assemblyend = lu->MatAssemblyEnd; 376 B->ops->lufactorsymbolic = lu->MatLUFactorSymbolic; 377 B->ops->destroy = lu->MatDestroy; 378 /* lu is only a function pointer stash unless we've factored the matrix, which we haven't! */ 379 ierr = PetscFree(lu);CHKERRQ(ierr); 380 ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQAIJ);CHKERRQ(ierr); 381 } 382 *newmat = B; 383 PetscFunctionReturn(0); 384 } 385 EXTERN_C_END 386 387 EXTERN_C_BEGIN 388 #undef __FUNCT__ 389 #define __FUNCT__ "MatConvert_SeqAIJ_SuperLU" 390 int MatConvert_SeqAIJ_SuperLU(Mat A,MatType type,Mat *newmat) { 391 int ierr; 392 Mat B=*newmat; 393 Mat_SeqAIJ_SuperLU *lu; 394 395 PetscFunctionBegin; 396 if (B != A) { 397 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 398 } 399 400 ierr = PetscNew(Mat_SeqAIJ_SuperLU,&lu);CHKERRQ(ierr); 401 lu->MatView = A->ops->view; 402 lu->MatAssemblyEnd = A->ops->assemblyend; 403 lu->MatLUFactorSymbolic = A->ops->lufactorsymbolic; 404 lu->MatDestroy = A->ops->destroy; 405 lu->CleanUpSuperLU = PETSC_FALSE; 406 407 B->spptr = (void*)lu; 408 B->ops->view = MatView_SeqAIJ_SuperLU; 409 B->ops->assemblyend = MatAssemblyEnd_SeqAIJ_SuperLU; 410 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU; 411 B->ops->destroy = MatDestroy_SeqAIJ_SuperLU; 412 413 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqaij_superlu_C", 414 "MatConvert_SeqAIJ_SuperLU",MatConvert_SeqAIJ_SuperLU);CHKERRQ(ierr); 415 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_superlu_seqaij_C", 416 "MatConvert_SuperLU_SeqAIJ",MatConvert_SuperLU_SeqAIJ);CHKERRQ(ierr); 417 ierr = PetscObjectChangeTypeName((PetscObject)B,type);CHKERRQ(ierr); 418 *newmat = B; 419 PetscFunctionReturn(0); 420 } 421 EXTERN_C_END 422 423 EXTERN_C_BEGIN 424 #undef __FUNCT__ 425 #define __FUNCT__ "MatCreate_SeqAIJ_SuperLU" 426 int MatCreate_SeqAIJ_SuperLU(Mat A) { 427 int ierr; 428 429 PetscFunctionBegin; 430 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 431 ierr = MatConvert_SeqAIJ_SuperLU(A,MATSUPERLU,&A);CHKERRQ(ierr); 432 PetscFunctionReturn(0); 433 } 434 EXTERN_C_END 435