1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 EXTERN_C_BEGIN 10 #if defined(PETSC_USE_COMPLEX) 11 #define CASTDOUBLECOMPLEX (doublecomplex*) 12 #include <superlu_zdefs.h> 13 #define LUstructInit zLUstructInit 14 #define ScalePermstructInit zScalePermstructInit 15 #define ScalePermstructFree zScalePermstructFree 16 #define LUstructFree zLUstructFree 17 #define Destroy_LU zDestroy_LU 18 #define ScalePermstruct_t zScalePermstruct_t 19 #define LUstruct_t zLUstruct_t 20 #define SOLVEstruct_t zSOLVEstruct_t 21 #define SolveFinalize zSolveFinalize 22 #define pgssvx pzgssvx 23 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 24 #define pGetDiagU pzGetDiagU 25 #define allocateA_dist zallocateA_dist 26 #define SLU_ZD SLU_Z 27 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 28 #define DeAllocLlu_3d zDeAllocLlu_3d 29 #define DeAllocGlu_3d zDeAllocGlu_3d 30 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 31 #define pgssvx3d pzgssvx3d 32 #endif 33 #else 34 #define CASTDOUBLECOMPLEX 35 #include <superlu_ddefs.h> 36 #define LUstructInit dLUstructInit 37 #define ScalePermstructInit dScalePermstructInit 38 #define ScalePermstructFree dScalePermstructFree 39 #define LUstructFree dLUstructFree 40 #define Destroy_LU dDestroy_LU 41 #define ScalePermstruct_t dScalePermstruct_t 42 #define LUstruct_t dLUstruct_t 43 #define SOLVEstruct_t dSOLVEstruct_t 44 #define SolveFinalize dSolveFinalize 45 #define pgssvx pdgssvx 46 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 47 #define pGetDiagU pdGetDiagU 48 #define allocateA_dist dallocateA_dist 49 #define SLU_ZD SLU_D 50 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 51 #define DeAllocLlu_3d dDeAllocLlu_3d 52 #define DeAllocGlu_3d dDeAllocGlu_3d 53 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 54 #define pgssvx3d pdgssvx3d 55 #endif 56 #endif 57 EXTERN_C_END 58 59 typedef struct { 60 int_t nprow,npcol,*row,*col; 61 gridinfo_t grid; 62 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 63 PetscBool use3d; 64 int_t npdep; /* replication factor, must be power of two */ 65 gridinfo3d_t grid3d; 66 #endif 67 superlu_dist_options_t options; 68 SuperMatrix A_sup; 69 ScalePermstruct_t ScalePermstruct; 70 LUstruct_t LUstruct; 71 int StatPrint; 72 SOLVEstruct_t SOLVEstruct; 73 fact_t FactPattern; 74 MPI_Comm comm_superlu; 75 #if defined(PETSC_USE_COMPLEX) 76 doublecomplex *val; 77 #else 78 double *val; 79 #endif 80 PetscBool matsolve_iscalled,matmatsolve_iscalled; 81 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 82 } Mat_SuperLU_DIST; 83 84 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F,PetscScalar *diagU) 85 { 86 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 87 88 PetscFunctionBegin; 89 PetscStackCall("SuperLU_DIST:pGetDiagU",pGetDiagU(F->rmap->N,&lu->LUstruct,&lu->grid,CASTDOUBLECOMPLEX diagU)); 90 PetscFunctionReturn(0); 91 } 92 93 PetscErrorCode MatSuperluDistGetDiagU(Mat F,PetscScalar *diagU) 94 { 95 PetscErrorCode ierr; 96 97 PetscFunctionBegin; 98 PetscValidHeaderSpecific(F,MAT_CLASSID,1); 99 ierr = PetscTryMethod(F,"MatSuperluDistGetDiagU_C",(Mat,PetscScalar*),(F,diagU));CHKERRQ(ierr); 100 PetscFunctionReturn(0); 101 } 102 103 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 104 typedef struct { 105 MPI_Comm comm; 106 PetscBool busy; 107 gridinfo_t grid; 108 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 109 PetscBool use3d; 110 gridinfo3d_t grid3d; 111 #endif 112 } PetscSuperLU_DIST; 113 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 114 115 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm,PetscMPIInt keyval,void *attr_val,void *extra_state) 116 { 117 PetscErrorCode ierr; 118 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *) attr_val; 119 120 PetscFunctionBegin; 121 if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF,PETSC_ERR_ARG_CORRUPT,"Unexpected keyval"); 122 ierr = PetscInfo(NULL,"Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n");CHKERRQ(ierr); 123 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 124 if (context->use3d) { 125 PetscStackCall("SuperLU_DIST:superlu_gridexit3d",superlu_gridexit3d(&context->grid3d)); 126 } else 127 #endif 128 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&context->grid)); 129 ierr = MPI_Comm_free(&context->comm);CHKERRMPI(ierr); 130 ierr = PetscFree(context);CHKERRQ(ierr); 131 PetscFunctionReturn(MPI_SUCCESS); 132 } 133 134 /* 135 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 136 users who do not destroy all PETSc objects before PetscFinalize(). 137 138 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn() 139 can still check that the keyval associated with the MPI communicator is correct when the MPI 140 communicator is destroyed. 141 142 This is called in PetscFinalize() 143 */ 144 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 145 { 146 PetscErrorCode ierr; 147 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 148 149 PetscFunctionBegin; 150 ierr = PetscInfo(NULL,"Freeing Petsc_Superlu_dist_keyval\n");CHKERRQ(ierr); 151 ierr = MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp);CHKERRMPI(ierr); 152 PetscFunctionReturn(0); 153 } 154 155 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 156 { 157 PetscErrorCode ierr; 158 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 159 160 PetscFunctionBegin; 161 if (lu->CleanUpSuperLU_Dist) { 162 /* Deallocate SuperLU_DIST storage */ 163 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 164 if (lu->options.SolveInitialized) { 165 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 166 } 167 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 168 if (lu->use3d) { 169 if (lu->grid3d.zscp.Iam == 0) { 170 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 171 } else { 172 PetscStackCall("SuperLU_DIST:DeAllocLlu_3d",DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 173 PetscStackCall("SuperLU_DIST:DeAllocGlu_3d",DeAllocGlu_3d(&lu->LUstruct)); 174 } 175 PetscStackCall("SuperLU_DIST:Destroy_A3d_gathered_on_2d",Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 176 } else 177 #endif 178 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 179 PetscStackCall("SuperLU_DIST:ScalePermstructFree",ScalePermstructFree(&lu->ScalePermstruct)); 180 PetscStackCall("SuperLU_DIST:LUstructFree",LUstructFree(&lu->LUstruct)); 181 182 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 183 if (lu->comm_superlu) { 184 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 185 if (lu->use3d) { 186 PetscStackCall("SuperLU_DIST:superlu_gridexit3d",superlu_gridexit3d(&lu->grid3d)); 187 } else 188 #endif 189 PetscStackCall("SuperLU_DIST:superlu_gridexit",superlu_gridexit(&lu->grid)); 190 } 191 } 192 /* 193 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist. 194 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use 195 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist 196 * is off, and the communicator was not released or marked as "not busy " in the old code. 197 * Here we try to release comm regardless. 198 */ 199 if (lu->comm_superlu) { 200 ierr = PetscCommRestoreComm(PetscObjectComm((PetscObject)A),&lu->comm_superlu);CHKERRQ(ierr); 201 } else { 202 PetscSuperLU_DIST *context; 203 MPI_Comm comm; 204 PetscMPIInt flg; 205 206 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 207 ierr = MPI_Comm_get_attr(comm,Petsc_Superlu_dist_keyval,&context,&flg);CHKERRMPI(ierr); 208 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Communicator does not have expected Petsc_Superlu_dist_keyval attribute"); 209 context->busy = PETSC_FALSE; 210 } 211 212 ierr = PetscFree(A->data);CHKERRQ(ierr); 213 /* clear composed functions */ 214 ierr = PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);CHKERRQ(ierr); 215 ierr = PetscObjectComposeFunction((PetscObject)A,"MatSuperluDistGetDiagU_C",NULL);CHKERRQ(ierr); 216 217 PetscFunctionReturn(0); 218 } 219 220 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A,Vec b_mpi,Vec x) 221 { 222 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 223 PetscErrorCode ierr; 224 PetscInt m=A->rmap->n; 225 SuperLUStat_t stat; 226 double berr[1]; 227 PetscScalar *bptr=NULL; 228 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 229 static PetscBool cite = PETSC_FALSE; 230 231 PetscFunctionBegin; 232 PetscCheck(lu->options.Fact == FACTORED,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 233 ierr = PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n",&cite);CHKERRQ(ierr); 234 235 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 236 /* see comments in MatMatSolve() */ 237 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 238 lu->options.SolveInitialized = NO; 239 } 240 ierr = VecCopy(b_mpi,x);CHKERRQ(ierr); 241 ierr = VecGetArray(x,&bptr);CHKERRQ(ierr); 242 243 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 244 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 245 if (lu->use3d) 246 PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,1,&lu->grid3d,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 247 else 248 #endif 249 PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,1,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 250 PetscCheck(!info,PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d",info); 251 252 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 253 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 254 255 ierr = VecRestoreArray(x,&bptr);CHKERRQ(ierr); 256 lu->matsolve_iscalled = PETSC_TRUE; 257 lu->matmatsolve_iscalled = PETSC_FALSE; 258 PetscFunctionReturn(0); 259 } 260 261 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A,Mat B_mpi,Mat X) 262 { 263 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)A->data; 264 PetscErrorCode ierr; 265 PetscInt m=A->rmap->n,nrhs; 266 SuperLUStat_t stat; 267 double berr[1]; 268 PetscScalar *bptr; 269 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 270 PetscBool flg; 271 272 PetscFunctionBegin; 273 PetscCheck(lu->options.Fact == FACTORED,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"SuperLU_DIST options.Fact must equal FACTORED"); 274 ierr = PetscObjectTypeCompareAny((PetscObject)B_mpi,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 275 PetscCheck(flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix B must be MATDENSE matrix"); 276 if (X != B_mpi) { 277 ierr = PetscObjectTypeCompareAny((PetscObject)X,&flg,MATSEQDENSE,MATMPIDENSE,NULL);CHKERRQ(ierr); 278 PetscCheckFalse(!flg,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix X must be MATDENSE matrix"); 279 } 280 281 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 282 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 283 thus destroy it and create a new SOLVEstruct. 284 Otherwise it may result in memory corruption or incorrect solution 285 See src/mat/tests/ex125.c */ 286 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 287 lu->options.SolveInitialized = NO; 288 } 289 if (X != B_mpi) { 290 ierr = MatCopy(B_mpi,X,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 291 } 292 293 ierr = MatGetSize(B_mpi,NULL,&nrhs);CHKERRQ(ierr); 294 295 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 296 ierr = MatDenseGetArray(X,&bptr);CHKERRQ(ierr); 297 298 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 299 if (lu->use3d) 300 PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,nrhs,&lu->grid3d,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 301 else 302 #endif 303 PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options,&lu->A_sup,&lu->ScalePermstruct,CASTDOUBLECOMPLEX bptr,m,nrhs,&lu->grid,&lu->LUstruct,&lu->SOLVEstruct,berr,&stat,&info)); 304 305 PetscCheck(!info,PETSC_COMM_SELF,PETSC_ERR_LIB,"pdgssvx fails, info: %d",info); 306 ierr = MatDenseRestoreArray(X,&bptr);CHKERRQ(ierr); 307 308 if (lu->options.PrintStat) PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 309 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 310 lu->matsolve_iscalled = PETSC_FALSE; 311 lu->matmatsolve_iscalled = PETSC_TRUE; 312 PetscFunctionReturn(0); 313 } 314 315 /* 316 input: 317 F: numeric Cholesky factor 318 output: 319 nneg: total number of negative pivots 320 nzero: total number of zero pivots 321 npos: (global dimension of F) - nneg - nzero 322 */ 323 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos) 324 { 325 PetscErrorCode ierr; 326 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 327 PetscScalar *diagU=NULL; 328 PetscInt M,i,neg=0,zero=0,pos=0; 329 PetscReal r; 330 331 PetscFunctionBegin; 332 PetscCheck(F->assembled,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Matrix factor F is not assembled"); 333 PetscCheck(lu->options.RowPerm == NOROWPERM,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must set NOROWPERM"); 334 ierr = MatGetSize(F,&M,NULL);CHKERRQ(ierr); 335 ierr = PetscMalloc1(M,&diagU);CHKERRQ(ierr); 336 ierr = MatSuperluDistGetDiagU(F,diagU);CHKERRQ(ierr); 337 for (i=0; i<M; i++) { 338 #if defined(PETSC_USE_COMPLEX) 339 r = PetscImaginaryPart(diagU[i])/10.0; 340 PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"diagU[%" PetscInt_FMT "]=%g + i %g is non-real",i,(double)PetscRealPart(diagU[i]),(double)(r*10.0)); 341 r = PetscRealPart(diagU[i]); 342 #else 343 r = diagU[i]; 344 #endif 345 if (r > 0) { 346 pos++; 347 } else if (r < 0) { 348 neg++; 349 } else zero++; 350 } 351 352 ierr = PetscFree(diagU);CHKERRQ(ierr); 353 if (nneg) *nneg = neg; 354 if (nzero) *nzero = zero; 355 if (npos) *npos = pos; 356 PetscFunctionReturn(0); 357 } 358 359 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F,Mat A,const MatFactorInfo *info) 360 { 361 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 362 Mat Aloc; 363 const PetscScalar *av; 364 const PetscInt *ai=NULL,*aj=NULL; 365 PetscInt nz,dummy; 366 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 367 SuperLUStat_t stat; 368 double *berr=0; 369 PetscBool ismpiaij,isseqaij,flg; 370 PetscErrorCode ierr; 371 372 PetscFunctionBegin; 373 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 374 ierr = PetscObjectBaseTypeCompare((PetscObject)A,MATMPIAIJ,&ismpiaij);CHKERRQ(ierr); 375 if (ismpiaij) { 376 ierr = MatMPIAIJGetLocalMat(A,MAT_INITIAL_MATRIX,&Aloc);CHKERRQ(ierr); 377 } else if (isseqaij) { 378 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 379 Aloc = A; 380 } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Not for type %s",((PetscObject)A)->type_name); 381 382 ierr = MatGetRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg);CHKERRQ(ierr); 383 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"GetRowIJ failed"); 384 ierr = MatSeqAIJGetArrayRead(Aloc,&av);CHKERRQ(ierr); 385 nz = ai[Aloc->rmap->n]; 386 387 /* Allocations for A_sup */ 388 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 389 PetscStackCall("SuperLU_DIST:allocateA_dist",allocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 390 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 391 if (lu->FactPattern == SamePattern_SameRowPerm) { 392 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 393 } else if (lu->FactPattern == SamePattern) { 394 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 395 if (lu->use3d) { 396 if (lu->grid3d.zscp.Iam == 0) { 397 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 398 PetscStackCall("SuperLU_DIST:SolveFinalize",SolveFinalize(&lu->options, &lu->SOLVEstruct)); 399 } else { 400 PetscStackCall("SuperLU_DIST:DeAllocLlu_3d",DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 401 PetscStackCall("SuperLU_DIST:DeAllocGlu_3d",DeAllocGlu_3d(&lu->LUstruct)); 402 } 403 } else 404 #endif 405 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 406 lu->options.Fact = SamePattern; 407 } else if (lu->FactPattern == DOFACT) { 408 PetscStackCall("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist",Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 409 PetscStackCall("SuperLU_DIST:Destroy_LU",Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 410 lu->options.Fact = DOFACT; 411 PetscStackCall("SuperLU_DIST:allocateA_dist",allocateA_dist(Aloc->rmap->n, nz, &lu->val, &lu->col, &lu->row)); 412 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 413 } 414 415 /* Copy AIJ matrix to superlu_dist matrix */ 416 ierr = PetscArraycpy(lu->row,ai,Aloc->rmap->n+1);CHKERRQ(ierr); 417 ierr = PetscArraycpy(lu->col,aj,nz);CHKERRQ(ierr); 418 ierr = PetscArraycpy(lu->val,av,nz);CHKERRQ(ierr); 419 ierr = MatRestoreRowIJ(Aloc,0,PETSC_FALSE,PETSC_FALSE,&dummy,&ai,&aj,&flg);CHKERRQ(ierr); 420 PetscCheck(flg,PETSC_COMM_SELF,PETSC_ERR_SUP,"RestoreRowIJ failed"); 421 ierr = MatSeqAIJRestoreArrayRead(Aloc,&av);CHKERRQ(ierr); 422 ierr = MatDestroy(&Aloc);CHKERRQ(ierr); 423 424 /* Create and setup A_sup */ 425 if (lu->options.Fact == DOFACT) { 426 PetscStackCall("SuperLU_DIST:Create_CompRowLoc_Matrix_dist",Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->val, lu->col, lu->row, SLU_NR_loc, SLU_ZD, SLU_GE)); 427 } 428 429 /* Factor the matrix. */ 430 PetscStackCall("SuperLU_DIST:PStatInit",PStatInit(&stat)); /* Initialize the statistics variables. */ 431 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 432 if (lu->use3d) { 433 PetscStackCall("SuperLU_DIST:pgssvx3d",pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 434 } else 435 #endif 436 PetscStackCall("SuperLU_DIST:pgssvx",pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 437 if (sinfo > 0) { 438 PetscCheck(!A->erroriffailure,PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot in row %d",sinfo); 439 else { 440 if (sinfo <= lu->A_sup.ncol) { 441 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 442 ierr = PetscInfo(F,"U(i,i) is exactly zero, i= %d\n",sinfo);CHKERRQ(ierr); 443 } else if (sinfo > lu->A_sup.ncol) { 444 /* 445 number of bytes allocated when memory allocation 446 failure occurred, plus A->ncol. 447 */ 448 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 449 ierr = PetscInfo(F,"Number of bytes allocated when memory allocation fails %d\n",sinfo);CHKERRQ(ierr); 450 } 451 } 452 } else PetscCheck(sinfo >= 0,PETSC_COMM_SELF,PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 453 454 if (lu->options.PrintStat) { 455 PetscStackCall("SuperLU_DIST:PStatPrint",PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 456 } 457 PetscStackCall("SuperLU_DIST:PStatFree",PStatFree(&stat)); 458 F->assembled = PETSC_TRUE; 459 F->preallocated = PETSC_TRUE; 460 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 461 PetscFunctionReturn(0); 462 } 463 464 /* Note the Petsc r and c permutations are ignored */ 465 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info) 466 { 467 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST*)F->data; 468 PetscInt M = A->rmap->N,N=A->cmap->N; 469 470 PetscFunctionBegin; 471 /* Initialize ScalePermstruct and LUstruct. */ 472 PetscStackCall("SuperLU_DIST:ScalePermstructInit",ScalePermstructInit(M, N, &lu->ScalePermstruct)); 473 PetscStackCall("SuperLU_DIST:LUstructInit",LUstructInit(N, &lu->LUstruct)); 474 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 475 F->ops->solve = MatSolve_SuperLU_DIST; 476 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 477 F->ops->getinertia = NULL; 478 479 if (A->symmetric || A->hermitian) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 480 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 481 PetscFunctionReturn(0); 482 } 483 484 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F,Mat A,IS r,const MatFactorInfo *info) 485 { 486 PetscErrorCode ierr; 487 488 PetscFunctionBegin; 489 ierr = MatLUFactorSymbolic_SuperLU_DIST(F,A,r,r,info);CHKERRQ(ierr); 490 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 491 PetscFunctionReturn(0); 492 } 493 494 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A,MatSolverType *type) 495 { 496 PetscFunctionBegin; 497 *type = MATSOLVERSUPERLU_DIST; 498 PetscFunctionReturn(0); 499 } 500 501 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A,PetscViewer viewer) 502 { 503 Mat_SuperLU_DIST *lu=(Mat_SuperLU_DIST*)A->data; 504 superlu_dist_options_t options; 505 PetscErrorCode ierr; 506 507 PetscFunctionBegin; 508 /* check if matrix is superlu_dist type */ 509 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(0); 510 511 options = lu->options; 512 ierr = PetscViewerASCIIPrintf(viewer,"SuperLU_DIST run parameters:\n");CHKERRQ(ierr); 513 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 514 * format spec for int64_t is set to %d for whatever reason */ 515 ierr = PetscViewerASCIIPrintf(viewer," Process grid nprow %lld x npcol %lld \n",(long long)lu->nprow,(long long)lu->npcol);CHKERRQ(ierr); 516 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 517 if (lu->use3d) { 518 ierr = PetscViewerASCIIPrintf(viewer," Using 3d decomposition with npdep %lld \n",(long long)lu->npdep);CHKERRQ(ierr); 519 } 520 #endif 521 522 ierr = PetscViewerASCIIPrintf(viewer," Equilibrate matrix %s \n",PetscBools[options.Equil != NO]);CHKERRQ(ierr); 523 ierr = PetscViewerASCIIPrintf(viewer," Replace tiny pivots %s \n",PetscBools[options.ReplaceTinyPivot != NO]);CHKERRQ(ierr); 524 ierr = PetscViewerASCIIPrintf(viewer," Use iterative refinement %s \n",PetscBools[options.IterRefine == SLU_DOUBLE]);CHKERRQ(ierr); 525 ierr = PetscViewerASCIIPrintf(viewer," Processors in row %lld col partition %lld \n",(long long)lu->nprow,(long long)lu->npcol);CHKERRQ(ierr); 526 527 switch (options.RowPerm) { 528 case NOROWPERM: 529 ierr = PetscViewerASCIIPrintf(viewer," Row permutation NOROWPERM\n");CHKERRQ(ierr); 530 break; 531 case LargeDiag_MC64: 532 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_MC64\n");CHKERRQ(ierr); 533 break; 534 case LargeDiag_AWPM: 535 ierr = PetscViewerASCIIPrintf(viewer," Row permutation LargeDiag_AWPM\n");CHKERRQ(ierr); 536 break; 537 case MY_PERMR: 538 ierr = PetscViewerASCIIPrintf(viewer," Row permutation MY_PERMR\n");CHKERRQ(ierr); 539 break; 540 default: 541 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 542 } 543 544 switch (options.ColPerm) { 545 case NATURAL: 546 ierr = PetscViewerASCIIPrintf(viewer," Column permutation NATURAL\n");CHKERRQ(ierr); 547 break; 548 case MMD_AT_PLUS_A: 549 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_AT_PLUS_A\n");CHKERRQ(ierr); 550 break; 551 case MMD_ATA: 552 ierr = PetscViewerASCIIPrintf(viewer," Column permutation MMD_ATA\n");CHKERRQ(ierr); 553 break; 554 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 555 case METIS_AT_PLUS_A: 556 ierr = PetscViewerASCIIPrintf(viewer," Column permutation METIS_AT_PLUS_A\n");CHKERRQ(ierr); 557 break; 558 case PARMETIS: 559 ierr = PetscViewerASCIIPrintf(viewer," Column permutation PARMETIS\n");CHKERRQ(ierr); 560 break; 561 default: 562 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 563 } 564 565 ierr = PetscViewerASCIIPrintf(viewer," Parallel symbolic factorization %s \n",PetscBools[options.ParSymbFact != NO]);CHKERRQ(ierr); 566 567 if (lu->FactPattern == SamePattern) { 568 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern\n");CHKERRQ(ierr); 569 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 570 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization SamePattern_SameRowPerm\n");CHKERRQ(ierr); 571 } else if (lu->FactPattern == DOFACT) { 572 ierr = PetscViewerASCIIPrintf(viewer," Repeated factorization DOFACT\n");CHKERRQ(ierr); 573 } else { 574 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown factorization pattern"); 575 } 576 PetscFunctionReturn(0); 577 } 578 579 static PetscErrorCode MatView_SuperLU_DIST(Mat A,PetscViewer viewer) 580 { 581 PetscErrorCode ierr; 582 PetscBool iascii; 583 PetscViewerFormat format; 584 585 PetscFunctionBegin; 586 ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr); 587 if (iascii) { 588 ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr); 589 if (format == PETSC_VIEWER_ASCII_INFO) { 590 ierr = MatView_Info_SuperLU_DIST(A,viewer);CHKERRQ(ierr); 591 } 592 } 593 PetscFunctionReturn(0); 594 } 595 596 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A,MatFactorType ftype,Mat *F) 597 { 598 Mat B; 599 Mat_SuperLU_DIST *lu; 600 PetscErrorCode ierr; 601 PetscInt M=A->rmap->N,N=A->cmap->N,indx; 602 PetscMPIInt size; 603 superlu_dist_options_t options; 604 PetscBool flg; 605 const char *colperm[] = {"NATURAL","MMD_AT_PLUS_A","MMD_ATA","METIS_AT_PLUS_A","PARMETIS"}; 606 const char *rowperm[] = {"NOROWPERM","LargeDiag_MC64","LargeDiag_AWPM","MY_PERMR"}; 607 const char *factPattern[] = {"SamePattern","SamePattern_SameRowPerm","DOFACT"}; 608 PetscBool set; 609 610 PetscFunctionBegin; 611 /* Create the factorization matrix */ 612 ierr = MatCreate(PetscObjectComm((PetscObject)A),&B);CHKERRQ(ierr); 613 ierr = MatSetSizes(B,A->rmap->n,A->cmap->n,M,N);CHKERRQ(ierr); 614 ierr = PetscStrallocpy("superlu_dist",&((PetscObject)B)->type_name);CHKERRQ(ierr); 615 ierr = MatSetUp(B);CHKERRQ(ierr); 616 B->ops->getinfo = MatGetInfo_External; 617 B->ops->view = MatView_SuperLU_DIST; 618 B->ops->destroy = MatDestroy_SuperLU_DIST; 619 620 /* Set the default input options: 621 options.Fact = DOFACT; 622 options.Equil = YES; 623 options.ParSymbFact = NO; 624 options.ColPerm = METIS_AT_PLUS_A; 625 options.RowPerm = LargeDiag_MC64; 626 options.ReplaceTinyPivot = YES; 627 options.IterRefine = DOUBLE; 628 options.Trans = NOTRANS; 629 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 630 options.RefineInitialized = NO; 631 options.PrintStat = YES; 632 options.SymPattern = NO; 633 */ 634 set_default_options_dist(&options); 635 636 B->trivialsymbolic = PETSC_TRUE; 637 if (ftype == MAT_FACTOR_LU) { 638 B->factortype = MAT_FACTOR_LU; 639 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 640 } else { 641 B->factortype = MAT_FACTOR_CHOLESKY; 642 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 643 options.SymPattern = YES; 644 } 645 646 /* set solvertype */ 647 ierr = PetscFree(B->solvertype);CHKERRQ(ierr); 648 ierr = PetscStrallocpy(MATSOLVERSUPERLU_DIST,&B->solvertype);CHKERRQ(ierr); 649 650 ierr = PetscNewLog(B,&lu);CHKERRQ(ierr); 651 B->data = lu; 652 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr); 653 654 { 655 PetscMPIInt flg; 656 MPI_Comm comm; 657 PetscSuperLU_DIST *context = NULL; 658 659 ierr = PetscObjectGetComm((PetscObject)A,&comm);CHKERRQ(ierr); 660 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 661 ierr = MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_Superlu_dist_keyval_Delete_Fn,&Petsc_Superlu_dist_keyval,(void*)0);CHKERRMPI(ierr); 662 ierr = PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free);CHKERRQ(ierr); 663 } 664 ierr = MPI_Comm_get_attr(comm,Petsc_Superlu_dist_keyval,&context,&flg);CHKERRMPI(ierr); 665 if (!flg || context->busy) { 666 if (!flg) { 667 ierr = PetscNew(&context);CHKERRQ(ierr); 668 context->busy = PETSC_TRUE; 669 ierr = MPI_Comm_dup(comm,&context->comm);CHKERRMPI(ierr); 670 ierr = MPI_Comm_set_attr(comm,Petsc_Superlu_dist_keyval,context);CHKERRMPI(ierr); 671 } else { 672 ierr = PetscCommGetComm(PetscObjectComm((PetscObject)A),&lu->comm_superlu);CHKERRQ(ierr); 673 } 674 675 /* Default number of process columns and rows */ 676 lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)size)); 677 if (!lu->nprow) lu->nprow = 1; 678 while (lu->nprow > 0) { 679 lu->npcol = (int_t) (size/lu->nprow); 680 if (size == lu->nprow * lu->npcol) break; 681 lu->nprow--; 682 } 683 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 684 lu->use3d = PETSC_FALSE; 685 lu->npdep = 1; 686 #endif 687 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 688 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 689 ierr = PetscOptionsBool("-mat_superlu_dist_3d","Use SuperLU_DIST 3D distribution","None",lu->use3d,&lu->use3d,NULL);CHKERRQ(ierr); 690 if (lu->use3d) { 691 PetscInt t; 692 ierr = PetscOptionsInt("-mat_superlu_dist_d","Number of z entries in processor partition","None",lu->npdep,(PetscInt*)&lu->npdep,NULL);CHKERRQ(ierr); 693 t = (PetscInt) PetscLog2Real((PetscReal)lu->npdep); 694 PetscCheck(PetscPowInt(2,t) == lu->npdep,PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_OUTOFRANGE,"-mat_superlu_dist_d %lld must be a power of 2",(long long)lu->npdep); 695 if (lu->npdep > 1) { 696 lu->nprow = (int_t) (0.5 + PetscSqrtReal((PetscReal)(size/lu->npdep))); 697 if (!lu->nprow) lu->nprow = 1; 698 while (lu->nprow > 0) { 699 lu->npcol = (int_t) (size/(lu->npdep*lu->nprow)); 700 if (size == lu->nprow * lu->npcol * lu->npdep) break; 701 lu->nprow--; 702 } 703 } 704 } 705 #endif 706 ierr = PetscOptionsInt("-mat_superlu_dist_r","Number rows in processor partition","None",lu->nprow,(PetscInt*)&lu->nprow,NULL);CHKERRQ(ierr); 707 ierr = PetscOptionsInt("-mat_superlu_dist_c","Number columns in processor partition","None",lu->npcol,(PetscInt*)&lu->npcol,NULL);CHKERRQ(ierr); 708 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 709 PetscCheck(size == lu->nprow*lu->npcol*lu->npdep,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld",size,(long long)lu->nprow,(long long)lu->npcol,(long long)lu->npdep); 710 #else 711 PetscCheck(size == lu->nprow*lu->npcol,PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number of processes %d must equal to nprow %lld * npcol %lld",size,(long long)lu->nprow,(long long)lu->npcol); 712 #endif 713 ierr = PetscOptionsEnd();CHKERRQ(ierr); 714 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 715 if (lu->use3d) { 716 PetscStackCall("SuperLU_DIST:superlu_gridinit3d",superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol,lu->npdep, &lu->grid3d)); 717 if (context) {context->grid3d = lu->grid3d; context->use3d = lu->use3d;} 718 } else { 719 #endif 720 PetscStackCall("SuperLU_DIST:superlu_gridinit",superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 721 if (context) context->grid = lu->grid; 722 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7,2,0) 723 } 724 #endif 725 ierr = PetscInfo(NULL,"Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n");CHKERRQ(ierr); 726 if (flg) { 727 ierr = PetscInfo(NULL,"Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n");CHKERRQ(ierr); 728 } else { 729 ierr = PetscInfo(NULL,"Storing communicator and SuperLU_DIST grid in communicator attribute\n");CHKERRQ(ierr); 730 } 731 } else { 732 ierr = PetscInfo(NULL,"Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.");CHKERRQ(ierr); 733 context->busy = PETSC_TRUE; 734 lu->grid = context->grid; 735 } 736 } 737 738 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"SuperLU_Dist Options","Mat");CHKERRQ(ierr); 739 ierr = PetscOptionsBool("-mat_superlu_dist_equil","Equilibrate matrix","None",options.Equil ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 740 if (set && !flg) options.Equil = NO; 741 742 ierr = PetscOptionsEList("-mat_superlu_dist_rowperm","Row permutation","None",rowperm,4,rowperm[1],&indx,&flg);CHKERRQ(ierr); 743 if (flg) { 744 switch (indx) { 745 case 0: 746 options.RowPerm = NOROWPERM; 747 break; 748 case 1: 749 options.RowPerm = LargeDiag_MC64; 750 break; 751 case 2: 752 options.RowPerm = LargeDiag_AWPM; 753 break; 754 case 3: 755 options.RowPerm = MY_PERMR; 756 break; 757 default: 758 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown row permutation"); 759 } 760 } 761 762 ierr = PetscOptionsEList("-mat_superlu_dist_colperm","Column permutation","None",colperm,5,colperm[3],&indx,&flg);CHKERRQ(ierr); 763 if (flg) { 764 switch (indx) { 765 case 0: 766 options.ColPerm = NATURAL; 767 break; 768 case 1: 769 options.ColPerm = MMD_AT_PLUS_A; 770 break; 771 case 2: 772 options.ColPerm = MMD_ATA; 773 break; 774 case 3: 775 options.ColPerm = METIS_AT_PLUS_A; 776 break; 777 case 4: 778 options.ColPerm = PARMETIS; /* only works for np>1 */ 779 break; 780 default: 781 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown column permutation"); 782 } 783 } 784 785 options.ReplaceTinyPivot = NO; 786 ierr = PetscOptionsBool("-mat_superlu_dist_replacetinypivot","Replace tiny pivots","None",options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 787 if (set && flg) options.ReplaceTinyPivot = YES; 788 789 options.ParSymbFact = NO; 790 ierr = PetscOptionsBool("-mat_superlu_dist_parsymbfact","Parallel symbolic factorization","None",PETSC_FALSE,&flg,&set);CHKERRQ(ierr); 791 if (set && flg && size>1) { 792 #if defined(PETSC_HAVE_PARMETIS) 793 options.ParSymbFact = YES; 794 options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 795 #else 796 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"parsymbfact needs PARMETIS"); 797 #endif 798 } 799 800 lu->FactPattern = SamePattern; 801 ierr = PetscOptionsEList("-mat_superlu_dist_fact","Sparsity pattern for repeated matrix factorization","None",factPattern,3,factPattern[0],&indx,&flg);CHKERRQ(ierr); 802 if (flg) { 803 switch (indx) { 804 case 0: 805 lu->FactPattern = SamePattern; 806 break; 807 case 1: 808 lu->FactPattern = SamePattern_SameRowPerm; 809 break; 810 case 2: 811 lu->FactPattern = DOFACT; 812 break; 813 } 814 } 815 816 options.IterRefine = NOREFINE; 817 ierr = PetscOptionsBool("-mat_superlu_dist_iterrefine","Use iterative refinement","None",options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE ,&flg,&set);CHKERRQ(ierr); 818 if (set) { 819 if (flg) options.IterRefine = SLU_DOUBLE; 820 else options.IterRefine = NOREFINE; 821 } 822 823 if (PetscLogPrintInfo) options.PrintStat = YES; 824 else options.PrintStat = NO; 825 ierr = PetscOptionsBool("-mat_superlu_dist_statprint","Print factorization information","None",(PetscBool)options.PrintStat,(PetscBool*)&options.PrintStat,NULL);CHKERRQ(ierr); 826 ierr = PetscOptionsEnd();CHKERRQ(ierr); 827 828 lu->options = options; 829 lu->options.Fact = DOFACT; 830 lu->matsolve_iscalled = PETSC_FALSE; 831 lu->matmatsolve_iscalled = PETSC_FALSE; 832 833 ierr = PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_aij_superlu_dist);CHKERRQ(ierr); 834 ierr = PetscObjectComposeFunction((PetscObject)B,"MatSuperluDistGetDiagU_C",MatSuperluDistGetDiagU_SuperLU_DIST);CHKERRQ(ierr); 835 836 *F = B; 837 PetscFunctionReturn(0); 838 } 839 840 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 841 { 842 PetscErrorCode ierr; 843 PetscFunctionBegin; 844 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 845 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_LU,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 846 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATMPIAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 847 ierr = MatSolverTypeRegister(MATSOLVERSUPERLU_DIST,MATSEQAIJ,MAT_FACTOR_CHOLESKY,MatGetFactor_aij_superlu_dist);CHKERRQ(ierr); 848 PetscFunctionReturn(0); 849 } 850 851 /*MC 852 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 853 854 Use ./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch to have PETSc installed with SuperLU_DIST 855 856 Use -pc_type lu -pc_factor_mat_solver_type superlu_dist to use this direct solver 857 858 Works with AIJ matrices 859 860 Options Database Keys: 861 + -mat_superlu_dist_r <n> - number of rows in processor partition 862 . -mat_superlu_dist_c <n> - number of columns in processor partition 863 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 864 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if -mat_superlu_dist_3d) is provided 865 . -mat_superlu_dist_equil - equilibrate the matrix 866 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 867 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 868 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 869 . -mat_superlu_dist_fact <SamePattern> - (choose one of) SamePattern SamePattern_SameRowPerm DOFACT 870 . -mat_superlu_dist_iterrefine - use iterative refinement 871 - -mat_superlu_dist_statprint - print factorization information 872 873 Notes: 874 If PETSc was configured with --with-cuda than this solver will automatically use the GPUs. 875 876 Level: beginner 877 878 .seealso: PCLU 879 880 .seealso: PCFactorSetMatSolverType(), MatSolverType 881 882 M*/ 883