1 /*$Id: superlu.c,v 1.10 2001/08/15 15:56:50 bsmith Exp $*/ 2 3 /* 4 Provides an interface to the SuperLU sparse solver 5 Modified for SuperLU 2.0 by Matthew Knepley 6 */ 7 8 #include "src/mat/impls/aij/seq/aij.h" 9 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #include "zsp_defs.h" 13 #else 14 #include "dsp_defs.h" 15 #endif 16 #include "util.h" 17 EXTERN_C_END 18 19 typedef struct { 20 SuperMatrix A,B,AC,L,U; 21 int *perm_r,*perm_c,ispec,relax,panel_size; 22 double pivot_threshold; 23 NCformat *store; 24 MatStructure flg; 25 PetscTruth SuperluMatOdering; 26 27 /* A few function pointers for inheritance */ 28 int (*MatView)(Mat,PetscViewer); 29 int (*MatAssemblyEnd)(Mat,MatAssemblyType); 30 int (*MatDestroy)(Mat); 31 32 /* Flag to clean up (non-global) SuperLU objects during Destroy */ 33 PetscTruth CleanUpSuperLU; 34 } Mat_SeqAIJ_SuperLU; 35 36 37 extern int MatDestroy_SeqAIJ(Mat); 38 39 #undef __FUNCT__ 40 #define __FUNCT__ "MatDestroy_SeqAIJ_SuperLU" 41 int MatDestroy_SeqAIJ_SuperLU(Mat A) 42 { 43 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 44 45 int ierr,(*destroy)(Mat); 46 47 PetscFunctionBegin; 48 /* It looks like this is decreasing the reference count a second time during MatDestroy?! */ 49 if (--A->refct > 0)PetscFunctionReturn(0); 50 /* We have to free the global data or SuperLU crashes (sucky design)*/ 51 /* Since we don't know if more solves on other matrices may be done 52 we cannot free the yucky SuperLU global data 53 StatFree(); 54 */ 55 56 /* Free the SuperLU datastructures */ 57 if (lu->CleanUpSuperLU) { 58 Destroy_CompCol_Permuted(&lu->AC); 59 Destroy_SuperNode_Matrix(&lu->L); 60 Destroy_CompCol_Matrix(&lu->U); 61 ierr = PetscFree(lu->B.Store);CHKERRQ(ierr); 62 ierr = PetscFree(lu->A.Store);CHKERRQ(ierr); 63 ierr = PetscFree(lu->perm_r);CHKERRQ(ierr); 64 ierr = PetscFree(lu->perm_c);CHKERRQ(ierr); 65 } 66 67 destroy = lu->MatDestroy; 68 ierr = PetscFree(lu);CHKERRQ(ierr); 69 ierr = (*destroy)(A);CHKERRQ(ierr); 70 PetscFunctionReturn(0); 71 } 72 73 #undef __FUNCT__ 74 #define __FUNCT__ "MatView_SeqAIJ_Spooles" 75 int MatView_SeqAIJ_SuperLU(Mat A,PetscViewer viewer) 76 { 77 int ierr; 78 PetscTruth isascii; 79 PetscViewerFormat format; 80 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU*)(A->spptr); 81 82 PetscFunctionBegin; 83 ierr = (*lu->MatView)(A,viewer);CHKERRQ(ierr); 84 85 ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr); 86 if (isascii) { 87 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 88 if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 89 ierr = MatSeqAIJFactorInfo_SuperLU(A,viewer);CHKERRQ(ierr); 90 } 91 } 92 PetscFunctionReturn(0); 93 } 94 95 #undef __FUNCT__ 96 #define __FUNCT__ "MatAssemblyEnd_SeqAIJ_SuperLU" 97 int MatAssemblyEnd_SeqAIJ_SuperLU(Mat A,MatAssemblyType mode) { 98 int ierr; 99 Mat_SeqAIJ_SuperLU *lu=(Mat_SeqAIJ_SuperLU*)(A->spptr); 100 101 PetscFunctionBegin; 102 ierr = (*lu->MatAssemblyEnd)(A,mode);CHKERRQ(ierr); 103 ierr = MatUseSuperLU_SeqAIJ(A);CHKERRQ(ierr); 104 PetscFunctionReturn(0); 105 } 106 107 #include "src/mat/impls/dense/seq/dense.h" 108 #undef __FUNCT__ 109 #define __FUNCT__ "MatCreateNull_SeqAIJ_SuperLU" 110 int MatCreateNull_SeqAIJ_SuperLU(Mat A,Mat *nullMat) 111 { 112 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 113 int numRows = A->m,numCols = A->n; 114 SCformat *Lstore; 115 int numNullCols,size; 116 #if defined(PETSC_USE_COMPLEX) 117 doublecomplex *nullVals,*workVals; 118 #else 119 PetscScalar *nullVals,*workVals; 120 #endif 121 int row,newRow,col,newCol,block,b,ierr; 122 123 PetscFunctionBegin; 124 PetscValidHeaderSpecific(A,MAT_COOKIE); 125 PetscValidPointer(nullMat); 126 if (!A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Unfactored matrix"); 127 numNullCols = numCols - numRows; 128 if (numNullCols < 0) SETERRQ(PETSC_ERR_ARG_WRONG,"Function only applies to underdetermined problems"); 129 /* Create the null matrix */ 130 ierr = MatCreateSeqDense(A->comm,numRows,numNullCols,PETSC_NULL,nullMat);CHKERRQ(ierr); 131 if (numNullCols == 0) { 132 ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 133 ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 134 PetscFunctionReturn(0); 135 } 136 #if defined(PETSC_USE_COMPLEX) 137 nullVals = (doublecomplex*)((Mat_SeqDense*)(*nullMat)->data)->v; 138 #else 139 nullVals = ((Mat_SeqDense*)(*nullMat)->data)->v; 140 #endif 141 /* Copy in the columns */ 142 Lstore = (SCformat*)lu->L.Store; 143 for(block = 0; block <= Lstore->nsuper; block++) { 144 newRow = Lstore->sup_to_col[block]; 145 size = Lstore->sup_to_col[block+1] - Lstore->sup_to_col[block]; 146 for(col = Lstore->rowind_colptr[newRow]; col < Lstore->rowind_colptr[newRow+1]; col++) { 147 newCol = Lstore->rowind[col]; 148 if (newCol >= numRows) { 149 for(b = 0; b < size; b++) 150 #if defined(PETSC_USE_COMPLEX) 151 nullVals[(newCol-numRows)*numRows+newRow+b] = ((doublecomplex*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col]; 152 #else 153 nullVals[(newCol-numRows)*numRows+newRow+b] = ((double*)Lstore->nzval)[Lstore->nzval_colptr[newRow+b]+col]; 154 #endif 155 } 156 } 157 } 158 /* Permute rhs to form P^T_c B */ 159 ierr = PetscMalloc(numRows*sizeof(double),&workVals);CHKERRQ(ierr); 160 for(b = 0; b < numNullCols; b++) { 161 for(row = 0; row < numRows; row++) workVals[lu->perm_c[row]] = nullVals[b*numRows+row]; 162 for(row = 0; row < numRows; row++) nullVals[b*numRows+row] = workVals[row]; 163 } 164 /* Backward solve the upper triangle A x = b */ 165 for(b = 0; b < numNullCols; b++) { 166 #if defined(PETSC_USE_COMPLEX) 167 sp_ztrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr); 168 #else 169 sp_dtrsv("L","T","U",&lu->L,&lu->U,&nullVals[b*numRows],&ierr); 170 #endif 171 if (ierr < 0) 172 SETERRQ1(PETSC_ERR_ARG_WRONG,"The argument %d was invalid",-ierr); 173 } 174 ierr = PetscFree(workVals);CHKERRQ(ierr); 175 176 ierr = MatAssemblyBegin(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 177 ierr = MatAssemblyEnd(*nullMat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 178 PetscFunctionReturn(0); 179 } 180 181 #undef __FUNCT__ 182 #define __FUNCT__ "MatSolve_SeqAIJ_SuperLU" 183 int MatSolve_SeqAIJ_SuperLU(Mat A,Vec b,Vec x) 184 { 185 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)A->spptr; 186 PetscScalar *array; 187 int m,ierr; 188 189 PetscFunctionBegin; 190 ierr = VecGetLocalSize(b,&m);CHKERRQ(ierr); 191 ierr = VecCopy(b,x);CHKERRQ(ierr); 192 ierr = VecGetArray(x,&array);CHKERRQ(ierr); 193 /* Create the Rhs */ 194 lu->B.Stype = SLU_DN; 195 lu->B.Mtype = SLU_GE; 196 lu->B.nrow = m; 197 lu->B.ncol = 1; 198 ((DNformat*)lu->B.Store)->lda = m; 199 ((DNformat*)lu->B.Store)->nzval = array; 200 #if defined(PETSC_USE_COMPLEX) 201 lu->B.Dtype = SLU_Z; 202 zgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr); 203 #else 204 lu->B.Dtype = SLU_D; 205 dgstrs("T",&lu->L,&lu->U,lu->perm_r,lu->perm_c,&lu->B,&ierr); 206 #endif 207 if (ierr < 0) SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr); 208 ierr = VecRestoreArray(x,&array);CHKERRQ(ierr); 209 PetscFunctionReturn(0); 210 } 211 212 static int StatInitCalled = 0; 213 214 #undef __FUNCT__ 215 #define __FUNCT__ "MatLUFactorNumeric_SeqAIJ_SuperLU" 216 int MatLUFactorNumeric_SeqAIJ_SuperLU(Mat A,Mat *F) 217 { 218 Mat_SeqAIJ *aa = (Mat_SeqAIJ*)(A)->data; 219 Mat_SeqAIJ_SuperLU *lu = (Mat_SeqAIJ_SuperLU*)(*F)->spptr; 220 int *etree,i,ierr; 221 PetscTruth flag; 222 223 PetscFunctionBegin; 224 /* Create the SuperMatrix for A^T: 225 Since SuperLU only likes column-oriented matrices,we pass it the transpose, 226 and then solve A^T X = B in MatSolve(). 227 */ 228 229 if ( lu->flg == DIFFERENT_NONZERO_PATTERN){ /* first numerical factorization */ 230 lu->A.Stype = SLU_NC; 231 #if defined(PETSC_USE_COMPLEX) 232 lu->A.Dtype = SLU_Z; 233 #else 234 lu->A.Dtype = SLU_D; 235 #endif 236 lu->A.Mtype = SLU_GE; 237 lu->A.nrow = A->n; 238 lu->A.ncol = A->m; 239 240 ierr = PetscMalloc(sizeof(NCformat),&lu->store);CHKERRQ(ierr); 241 ierr = PetscMalloc(sizeof(DNformat),&lu->B.Store);CHKERRQ(ierr); 242 } 243 lu->store->nnz = aa->nz; 244 lu->store->colptr = aa->i; 245 lu->store->rowind = aa->j; 246 lu->store->nzval = aa->a; 247 lu->A.Store = lu->store; 248 249 /* Set SuperLU options */ 250 lu->relax = sp_ienv(2); 251 lu->panel_size = sp_ienv(1); 252 /* We have to initialize global data or SuperLU crashes (sucky design) */ 253 if (!StatInitCalled) { 254 StatInit(lu->panel_size,lu->relax); 255 } 256 StatInitCalled++; 257 258 ierr = PetscOptionsBegin(A->comm,A->prefix,"SuperLU Options","Mat");CHKERRQ(ierr); 259 /* use SuperLU mat ordeing */ 260 ierr = PetscOptionsInt("-mat_superlu_ordering","SuperLU ordering type (one of 0, 1, 2, 3)\n 0: natural ordering;\n 1: MMD applied to A'*A;\n 2: MMD applied to A'+A;\n 3: COLAMD, approximate minimum degree column ordering","None",lu->ispec,&lu->ispec,&flag);CHKERRQ(ierr); 261 if (flag) { 262 get_perm_c(lu->ispec, &lu->A, lu->perm_c); 263 lu->SuperluMatOdering = PETSC_TRUE; 264 } 265 PetscOptionsEnd(); 266 267 /* Create the elimination tree */ 268 ierr = PetscMalloc(A->n*sizeof(int),&etree);CHKERRQ(ierr); 269 sp_preorder("N",&lu->A,lu->perm_c,etree,&lu->AC); 270 /* Factor the matrix */ 271 #if defined(PETSC_USE_COMPLEX) 272 zgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr); 273 #else 274 dgstrf("N",&lu->AC,lu->pivot_threshold,0.0,lu->relax,lu->panel_size,etree,PETSC_NULL,0,lu->perm_r,lu->perm_c,&lu->L,&lu->U,&ierr); 275 #endif 276 if (ierr < 0) { 277 SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element of row %d was invalid",-ierr); 278 } else if (ierr > 0) { 279 if (ierr <= A->m) { 280 SETERRQ1(PETSC_ERR_ARG_WRONG,"The diagonal element %d of U is exactly zero",ierr); 281 } else { 282 SETERRQ1(PETSC_ERR_ARG_WRONG,"Memory allocation failure after %d bytes were allocated",ierr-A->m); 283 } 284 } 285 286 /* Cleanup */ 287 ierr = PetscFree(etree);CHKERRQ(ierr); 288 289 lu->flg = SAME_NONZERO_PATTERN; 290 PetscFunctionReturn(0); 291 } 292 293 /* 294 Note the r permutation is ignored 295 */ 296 #undef __FUNCT__ 297 #define __FUNCT__ "MatLUFactorSymbolic_SeqAIJ_SuperLU" 298 int MatLUFactorSymbolic_SeqAIJ_SuperLU(Mat A,IS r,IS c,MatFactorInfo *info,Mat *F) 299 { 300 Mat B; 301 Mat_SeqAIJ_SuperLU *lu; 302 int ierr,*ca; 303 304 PetscFunctionBegin; 305 306 ierr = MatCreateSeqAIJ(A->comm,A->m,A->n,0,PETSC_NULL,F);CHKERRQ(ierr); 307 B = *F; 308 B->ops->lufactornumeric = MatLUFactorNumeric_SeqAIJ_SuperLU; 309 B->ops->solve = MatSolve_SeqAIJ_SuperLU; 310 B->ops->destroy = MatDestroy_SeqAIJ_SuperLU; 311 B->factor = FACTOR_LU; 312 (*F)->assembled = PETSC_TRUE; /* required by -sles_view */ 313 314 ierr = PetscNew(Mat_SeqAIJ_SuperLU,&lu);CHKERRQ(ierr); 315 B->spptr = (void*)lu; 316 ierr = PetscObjectComposeFunction((PetscObject)B,"MatCreateNull","MatCreateNull_SeqAIJ_SuperLU", 317 (void(*)(void))MatCreateNull_SeqAIJ_SuperLU);CHKERRQ(ierr); 318 319 /* Allocate the work arrays required by SuperLU (notice sizes are for the transpose) */ 320 ierr = PetscMalloc(A->n*sizeof(int),&lu->perm_r);CHKERRQ(ierr); 321 ierr = PetscMalloc(A->m*sizeof(int),&lu->perm_c);CHKERRQ(ierr); 322 323 /* use PETSc mat ordering */ 324 ierr = ISGetIndices(c,&ca);CHKERRQ(ierr); 325 ierr = PetscMemcpy(lu->perm_c,ca,A->m*sizeof(int));CHKERRQ(ierr); 326 ierr = ISRestoreIndices(c,&ca);CHKERRQ(ierr); 327 lu->SuperluMatOdering = PETSC_FALSE; 328 329 lu->pivot_threshold = info->dtcol; 330 PetscLogObjectMemory(B,(A->m+A->n)*sizeof(int)+sizeof(Mat_SeqAIJ_SuperLU)); 331 332 lu->flg = DIFFERENT_NONZERO_PATTERN; 333 lu->CleanUpSuperLU = PETSC_TRUE; 334 PetscFunctionReturn(0); 335 } 336 337 #undef __FUNCT__ 338 #define __FUNCT__ "MatUseSuperLU_SeqAIJ" 339 int MatUseSuperLU_SeqAIJ(Mat A) 340 { 341 PetscTruth flg; 342 int ierr; 343 344 PetscFunctionBegin; 345 PetscValidHeaderSpecific(A,MAT_COOKIE); 346 ierr = PetscTypeCompare((PetscObject)A,MATSUPERLU,&flg); 347 if (!flg) PetscFunctionReturn(0); 348 349 A->ops->lufactorsymbolic = MatLUFactorSymbolic_SeqAIJ_SuperLU; 350 351 PetscFunctionReturn(0); 352 } 353 354 /* used by -sles_view */ 355 #undef __FUNCT__ 356 #define __FUNCT__ "MatSeqAIJFactorInfo_SuperLU" 357 int MatSeqAIJFactorInfo_SuperLU(Mat A,PetscViewer viewer) 358 { 359 Mat_SeqAIJ_SuperLU *lu= (Mat_SeqAIJ_SuperLU*)A->spptr; 360 int ierr; 361 PetscFunctionBegin; 362 /* check if matrix is SuperLU type */ 363 if (A->ops->solve != MatSolve_SeqAIJ_SuperLU) PetscFunctionReturn(0); 364 365 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU run parameters:\n");CHKERRQ(ierr); 366 if(lu->SuperluMatOdering) ierr = PetscViewerASCIIPrintf(viewer," SuperLU mat ordering: %d\n",lu->ispec);CHKERRQ(ierr); 367 368 PetscFunctionReturn(0); 369 } 370 371 EXTERN_C_BEGIN 372 #undef __FUNCT__ 373 #define __FUNCT__ "MatCreate_SeqAIJ_SuperLU" 374 int MatCreate_SeqAIJ_SuperLU(Mat A) { 375 int ierr; 376 Mat_SeqAIJ_SuperLU *lu; 377 378 PetscFunctionBegin; 379 ierr = MatSetType(A,MATSEQAIJ);CHKERRQ(ierr); 380 ierr = MatUseSuperLU_SeqAIJ(A);CHKERRQ(ierr); 381 382 ierr = PetscNew(Mat_SeqAIJ_SuperLU,&lu);CHKERRQ(ierr); 383 lu->MatView = A->ops->view; 384 lu->MatAssemblyEnd = A->ops->assemblyend; 385 lu->MatDestroy = A->ops->destroy; 386 lu->CleanUpSuperLU = PETSC_FALSE; 387 A->spptr = (void*)lu; 388 A->ops->view = MatView_SeqAIJ_SuperLU; 389 A->ops->assemblyend = MatAssemblyEnd_SeqAIJ_SuperLU; 390 A->ops->destroy = MatDestroy_SeqAIJ_SuperLU; 391 PetscFunctionReturn(0); 392 } 393 EXTERN_C_END 394