1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wundef") 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #define CASTDOUBLECOMPLEX (doublecomplex *) 13 #define CASTDOUBLECOMPLEXSTAR (doublecomplex **) 14 #include <superlu_zdefs.h> 15 #define LUstructInit zLUstructInit 16 #define ScalePermstructInit zScalePermstructInit 17 #define ScalePermstructFree zScalePermstructFree 18 #define LUstructFree zLUstructFree 19 #define Destroy_LU zDestroy_LU 20 #define ScalePermstruct_t zScalePermstruct_t 21 #define LUstruct_t zLUstruct_t 22 #define SOLVEstruct_t zSOLVEstruct_t 23 #define SolveFinalize zSolveFinalize 24 #define pGetDiagU pzGetDiagU 25 #define pgssvx pzgssvx 26 #define allocateA_dist zallocateA_dist 27 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 28 #define SLU SLU_Z 29 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 30 #define DeAllocLlu_3d zDeAllocLlu_3d 31 #define DeAllocGlu_3d zDeAllocGlu_3d 32 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 33 #define pgssvx3d pzgssvx3d 34 #endif 35 #elif defined(PETSC_USE_REAL_SINGLE) 36 #define CASTDOUBLECOMPLEX 37 #define CASTDOUBLECOMPLEXSTAR 38 #include <superlu_sdefs.h> 39 #define LUstructInit sLUstructInit 40 #define ScalePermstructInit sScalePermstructInit 41 #define ScalePermstructFree sScalePermstructFree 42 #define LUstructFree sLUstructFree 43 #define Destroy_LU sDestroy_LU 44 #define ScalePermstruct_t sScalePermstruct_t 45 #define LUstruct_t sLUstruct_t 46 #define SOLVEstruct_t sSOLVEstruct_t 47 #define SolveFinalize sSolveFinalize 48 #define pGetDiagU psGetDiagU 49 #define pgssvx psgssvx 50 #define allocateA_dist sallocateA_dist 51 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist 52 #define SLU SLU_S 53 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 54 #define DeAllocLlu_3d sDeAllocLlu_3d 55 #define DeAllocGlu_3d sDeAllocGlu_3d 56 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d 57 #define pgssvx3d psgssvx3d 58 #endif 59 #else 60 #define CASTDOUBLECOMPLEX 61 #define CASTDOUBLECOMPLEXSTAR 62 #include <superlu_ddefs.h> 63 #define LUstructInit dLUstructInit 64 #define ScalePermstructInit dScalePermstructInit 65 #define ScalePermstructFree dScalePermstructFree 66 #define LUstructFree dLUstructFree 67 #define Destroy_LU dDestroy_LU 68 #define ScalePermstruct_t dScalePermstruct_t 69 #define LUstruct_t dLUstruct_t 70 #define SOLVEstruct_t dSOLVEstruct_t 71 #define SolveFinalize dSolveFinalize 72 #define pGetDiagU pdGetDiagU 73 #define pgssvx pdgssvx 74 #define allocateA_dist dallocateA_dist 75 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 76 #define SLU SLU_D 77 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 78 #define DeAllocLlu_3d dDeAllocLlu_3d 79 #define DeAllocGlu_3d dDeAllocGlu_3d 80 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 81 #define pgssvx3d pdgssvx3d 82 #endif 83 #endif 84 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 85 #include <superlu_sdefs.h> 86 #endif 87 EXTERN_C_END 88 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END() 89 90 typedef struct { 91 int_t nprow, npcol, *row, *col; 92 gridinfo_t grid; 93 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 94 PetscBool use3d; 95 int_t npdep; /* replication factor, must be power of two */ 96 gridinfo3d_t grid3d; 97 #endif 98 superlu_dist_options_t options; 99 SuperMatrix A_sup; 100 ScalePermstruct_t ScalePermstruct; 101 LUstruct_t LUstruct; 102 int StatPrint; 103 SOLVEstruct_t SOLVEstruct; 104 fact_t FactPattern; 105 MPI_Comm comm_superlu; 106 PetscScalar *val; 107 PetscBool matsolve_iscalled, matmatsolve_iscalled; 108 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 109 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 110 sScalePermstruct_t sScalePermstruct; 111 sLUstruct_t sLUstruct; 112 sSOLVEstruct_t sSOLVEstruct; 113 float *sval; 114 PetscBool singleprecision; /* use single precision SuperLU_DIST from double precision PETSc */ 115 float *sbptr; 116 #endif 117 } Mat_SuperLU_DIST; 118 119 static PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU) 120 { 121 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 122 123 PetscFunctionBegin; 124 PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU)); 125 PetscFunctionReturn(PETSC_SUCCESS); 126 } 127 128 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU) 129 { 130 PetscFunctionBegin; 131 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 132 PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU)); 133 PetscFunctionReturn(PETSC_SUCCESS); 134 } 135 136 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 137 typedef struct { 138 MPI_Comm comm; 139 PetscBool busy; 140 gridinfo_t grid; 141 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 142 PetscBool use3d; 143 gridinfo3d_t grid3d; 144 #endif 145 } PetscSuperLU_DIST; 146 147 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 148 149 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_DeleteFn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state) 150 { 151 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val; 152 153 PetscFunctionBegin; 154 PetscCheckReturnMPI(keyval == Petsc_Superlu_dist_keyval, PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval"); 155 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 156 if (context->use3d) { 157 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d)); 158 } else 159 #endif 160 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid)); 161 PetscCallMPIReturnMPI(MPI_Comm_free(&context->comm)); 162 PetscCallReturnMPI(PetscFree(context)); 163 PetscFunctionReturn(MPI_SUCCESS); 164 } 165 166 /* 167 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 168 users who do not destroy all PETSc objects before PetscFinalize(). 169 170 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_DeleteFn() 171 can still check that the keyval associated with the MPI communicator is correct when the MPI 172 communicator is destroyed. 173 174 This is called in PetscFinalize() 175 */ 176 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 177 { 178 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 179 180 PetscFunctionBegin; 181 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp)); 182 PetscFunctionReturn(PETSC_SUCCESS); 183 } 184 185 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 186 { 187 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 188 189 PetscFunctionBegin; 190 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 191 PetscCall(PetscFree(lu->sbptr)); 192 #endif 193 if (lu->CleanUpSuperLU_Dist) { 194 /* Deallocate SuperLU_DIST storage */ 195 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 196 if (lu->options.SolveInitialized) { 197 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 198 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 199 else 200 #endif 201 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 202 } 203 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 204 if (lu->use3d) { 205 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 206 if (lu->singleprecision) { 207 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct)); 208 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", sDestroy_A3d_gathered_on_2d(&lu->sSOLVEstruct, &lu->grid3d)); 209 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct)); 210 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct)); 211 } else 212 #endif 213 { 214 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 215 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 216 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct)); 217 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct)); 218 } 219 } else 220 #endif 221 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 222 if (lu->singleprecision) { 223 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid, &lu->sLUstruct)); 224 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", sScalePermstructFree(&lu->sScalePermstruct)); 225 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", sLUstructFree(&lu->sLUstruct)); 226 } else 227 #endif 228 { 229 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 230 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct)); 231 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct)); 232 } 233 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 234 if (lu->comm_superlu) { 235 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 236 if (lu->use3d) { 237 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d)); 238 } else 239 #endif 240 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid)); 241 } 242 } 243 /* 244 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist. 245 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use 246 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist 247 * is off, and the communicator was not released or marked as "not busy " in the old code. 248 * Here we try to release comm regardless. 249 */ 250 if (lu->comm_superlu) { 251 PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 252 } else { 253 PetscSuperLU_DIST *context; 254 MPI_Comm comm; 255 PetscMPIInt flg; 256 257 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 258 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg)); 259 if (flg) context->busy = PETSC_FALSE; 260 } 261 262 PetscCall(PetscFree(A->data)); 263 /* clear composed functions */ 264 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 265 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL)); 266 PetscFunctionReturn(PETSC_SUCCESS); 267 } 268 269 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x) 270 { 271 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 272 PetscInt m = A->rmap->n; 273 SuperLUStat_t stat; 274 PetscReal berr[1]; 275 PetscScalar *bptr = NULL; 276 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 277 float sberr[1]; 278 #endif 279 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 280 static PetscBool cite = PETSC_FALSE; 281 282 PetscFunctionBegin; 283 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 284 PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM " 285 "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n", 286 &cite)); 287 288 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 289 /* see comments in MatMatSolve() */ 290 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 291 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 292 else 293 #endif 294 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 295 lu->options.SolveInitialized = NO; 296 } 297 PetscCall(VecCopy(b_mpi, x)); 298 PetscCall(VecGetArray(x, &bptr)); 299 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 300 if (lu->singleprecision) { 301 PetscInt n; 302 PetscCall(VecGetLocalSize(x, &n)); 303 if (!lu->sbptr) PetscCall(PetscMalloc1(n, &lu->sbptr)); 304 for (PetscInt i = 0; i < n; i++) lu->sbptr[i] = PetscRealPart(bptr[i]); /* PetscRealPart() is a no-op to allow compiling with complex */ 305 } 306 #endif 307 308 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 309 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 310 if (lu->use3d) { 311 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 312 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, m, 1, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info)); 313 else 314 #endif 315 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 316 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pgssvx3d fails, info: %d", info); 317 } else 318 #endif 319 { 320 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 321 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, lu->sbptr, m, 1, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &info)); 322 else 323 #endif 324 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 325 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pgssvx fails, info: %d", info); 326 } 327 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 328 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 329 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 330 if (lu->singleprecision) { 331 PetscInt n; 332 PetscCall(VecGetLocalSize(x, &n)); 333 for (PetscInt i = 0; i < n; i++) bptr[i] = lu->sbptr[i]; 334 } 335 #endif 336 PetscCall(VecRestoreArray(x, &bptr)); 337 lu->matsolve_iscalled = PETSC_TRUE; 338 lu->matmatsolve_iscalled = PETSC_FALSE; 339 PetscFunctionReturn(PETSC_SUCCESS); 340 } 341 342 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X) 343 { 344 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 345 PetscInt m = A->rmap->n, nrhs; 346 SuperLUStat_t stat; 347 PetscReal berr[1]; 348 PetscScalar *bptr; 349 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 350 PetscBool flg; 351 352 PetscFunctionBegin; 353 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 354 PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for -pc_precision single"); 355 #endif 356 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 357 PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 358 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 359 if (X != B_mpi) { 360 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 361 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 362 } 363 364 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 365 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 366 thus destroy it and create a new SOLVEstruct. 367 Otherwise it may result in memory corruption or incorrect solution 368 See src/mat/tests/ex125.c */ 369 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 370 lu->options.SolveInitialized = NO; 371 } 372 if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN)); 373 374 PetscCall(MatGetSize(B_mpi, NULL, &nrhs)); 375 376 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 377 PetscCall(MatDenseGetArray(X, &bptr)); 378 379 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 380 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 381 else 382 #endif 383 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 384 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 385 PetscCall(MatDenseRestoreArray(X, &bptr)); 386 387 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 388 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 389 lu->matsolve_iscalled = PETSC_FALSE; 390 lu->matmatsolve_iscalled = PETSC_TRUE; 391 PetscFunctionReturn(PETSC_SUCCESS); 392 } 393 394 /* 395 input: 396 F: numeric Cholesky factor 397 output: 398 nneg: total number of negative pivots 399 nzero: total number of zero pivots 400 npos: (global dimension of F) - nneg - nzero 401 */ 402 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 403 { 404 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 405 PetscScalar *diagU = NULL; 406 PetscInt M, i, neg = 0, zero = 0, pos = 0; 407 PetscReal r; 408 409 PetscFunctionBegin; 410 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 411 PetscCheck(!lu->singleprecision, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Not for -pc_precision single"); 412 #endif 413 PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled"); 414 PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM"); 415 PetscCall(MatGetSize(F, &M, NULL)); 416 PetscCall(PetscMalloc1(M, &diagU)); 417 PetscCall(MatSuperluDistGetDiagU(F, diagU)); 418 for (i = 0; i < M; i++) { 419 #if defined(PETSC_USE_COMPLEX) 420 r = PetscImaginaryPart(diagU[i]) / 10.0; 421 PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0)); 422 r = PetscRealPart(diagU[i]); 423 #else 424 r = diagU[i]; 425 #endif 426 if (r > 0) { 427 pos++; 428 } else if (r < 0) { 429 neg++; 430 } else zero++; 431 } 432 433 PetscCall(PetscFree(diagU)); 434 if (nneg) *nneg = neg; 435 if (nzero) *nzero = zero; 436 if (npos) *npos = pos; 437 PetscFunctionReturn(PETSC_SUCCESS); 438 } 439 440 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info) 441 { 442 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 443 Mat Aloc; 444 const PetscScalar *av; 445 const PetscInt *ai = NULL, *aj = NULL; 446 PetscInt nz, dummy; 447 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 448 SuperLUStat_t stat; 449 PetscReal *berr = 0; 450 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 451 float *sberr = 0; 452 #endif 453 PetscBool ismpiaij, isseqaij, flg; 454 455 PetscFunctionBegin; 456 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 457 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij)); 458 if (ismpiaij) { 459 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc)); 460 } else if (isseqaij) { 461 PetscCall(PetscObjectReference((PetscObject)A)); 462 Aloc = A; 463 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 464 465 PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 466 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed"); 467 PetscCall(MatSeqAIJGetArrayRead(Aloc, &av)); 468 nz = ai[Aloc->rmap->n]; 469 470 /* Allocations for A_sup */ 471 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 472 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 473 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row)); 474 else 475 #endif 476 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 477 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 478 if (lu->FactPattern == SamePattern_SameRowPerm) { 479 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 480 } else if (lu->FactPattern == SamePattern) { 481 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 482 if (lu->use3d) { 483 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 484 if (lu->singleprecision) { 485 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->sLUstruct)); 486 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", sSolveFinalize(&lu->options, &lu->sSOLVEstruct)); 487 } else 488 #endif 489 { 490 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 491 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 492 } 493 } else 494 #endif 495 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 496 if (lu->singleprecision) 497 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct)); 498 else 499 #endif 500 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 501 lu->options.Fact = SamePattern; 502 } else if (lu->FactPattern == DOFACT) { 503 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 504 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 505 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", sDestroy_LU(A->rmap->N, &lu->grid, &lu->sLUstruct)); 506 else 507 #endif 508 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 509 lu->options.Fact = DOFACT; 510 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 511 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", sallocateA_dist(Aloc->rmap->n, nz, &lu->sval, &lu->col, &lu->row)); 512 else 513 #endif 514 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 515 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 516 } 517 518 /* Copy AIJ matrix to superlu_dist matrix */ 519 PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1)); 520 PetscCall(PetscArraycpy(lu->col, aj, nz)); 521 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 522 if (lu->singleprecision) 523 for (PetscInt i = 0; i < nz; i++) lu->sval[i] = PetscRealPart(av[i]); /* PetscRealPart() is a no-op to allow compiling with complex */ 524 else 525 #endif 526 PetscCall(PetscArraycpy(lu->val, av, nz)); 527 PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 528 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed"); 529 PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av)); 530 PetscCall(MatDestroy(&Aloc)); 531 532 /* Create and setup A_sup */ 533 if (lu->options.Fact == DOFACT) { 534 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 535 if (lu->singleprecision) 536 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", sCreate_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, lu->sval, lu->col, lu->row, SLU_NR_loc, SLU_S, SLU_GE)); 537 else 538 #endif 539 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE)); 540 } 541 542 /* Factor the matrix. */ 543 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 544 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 545 if (lu->use3d) { 546 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 547 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", psgssvx3d(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo)); 548 else 549 #endif 550 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 551 } else 552 #endif 553 { 554 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 555 if (lu->singleprecision) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", psgssvx(&lu->options, &lu->A_sup, &lu->sScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->sLUstruct, &lu->sSOLVEstruct, sberr, &stat, &sinfo)); 556 else 557 #endif 558 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 559 } 560 if (sinfo > 0) { 561 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo); 562 if (sinfo <= lu->A_sup.ncol) { 563 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 564 PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo)); 565 } else if (sinfo > lu->A_sup.ncol) { 566 /* 567 number of bytes allocated when memory allocation 568 failure occurred, plus A->ncol. 569 */ 570 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 571 PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo)); 572 } 573 } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 574 575 if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ } 576 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 577 F->assembled = PETSC_TRUE; 578 F->preallocated = PETSC_TRUE; 579 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 580 PetscFunctionReturn(PETSC_SUCCESS); 581 } 582 583 /* Note the Petsc r and c permutations are ignored */ 584 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 585 { 586 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 587 PetscInt M = A->rmap->N, N = A->cmap->N, indx; 588 PetscMPIInt size, mpiflg; 589 PetscBool flg, set; 590 const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"}; 591 const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"}; 592 const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"}; 593 MPI_Comm comm; 594 PetscSuperLU_DIST *context = NULL; 595 596 PetscFunctionBegin; 597 /* Set options to F */ 598 PetscCall(PetscObjectGetComm((PetscObject)F, &comm)); 599 PetscCallMPI(MPI_Comm_size(comm, &size)); 600 601 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat"); 602 PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 603 if (set && !flg) lu->options.Equil = NO; 604 605 PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg)); 606 if (flg) { 607 switch (indx) { 608 case 0: 609 lu->options.RowPerm = NOROWPERM; 610 break; 611 case 1: 612 lu->options.RowPerm = LargeDiag_MC64; 613 break; 614 case 2: 615 lu->options.RowPerm = LargeDiag_AWPM; 616 break; 617 case 3: 618 lu->options.RowPerm = MY_PERMR; 619 break; 620 default: 621 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation"); 622 } 623 } 624 625 PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg)); 626 if (flg) { 627 switch (indx) { 628 case 0: 629 lu->options.ColPerm = NATURAL; 630 break; 631 case 1: 632 lu->options.ColPerm = MMD_AT_PLUS_A; 633 break; 634 case 2: 635 lu->options.ColPerm = MMD_ATA; 636 break; 637 case 3: 638 lu->options.ColPerm = METIS_AT_PLUS_A; 639 break; 640 case 4: 641 lu->options.ColPerm = PARMETIS; /* only works for np>1 */ 642 break; 643 default: 644 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 645 } 646 } 647 648 lu->options.ReplaceTinyPivot = NO; 649 PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 650 if (set && flg) lu->options.ReplaceTinyPivot = YES; 651 652 lu->options.ParSymbFact = NO; 653 PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set)); 654 if (set && flg && size > 1) { 655 #if defined(PETSC_HAVE_PARMETIS) 656 lu->options.ParSymbFact = YES; 657 lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 658 #else 659 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS"); 660 #endif 661 } 662 663 lu->FactPattern = SamePattern; 664 PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg)); 665 if (flg) { 666 switch (indx) { 667 case 0: 668 lu->FactPattern = SamePattern; 669 break; 670 case 1: 671 lu->FactPattern = SamePattern_SameRowPerm; 672 break; 673 case 2: 674 lu->FactPattern = DOFACT; 675 break; 676 } 677 } 678 679 lu->options.IterRefine = NOREFINE; 680 PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set)); 681 if (set) { 682 if (flg) lu->options.IterRefine = SLU_DOUBLE; 683 else lu->options.IterRefine = NOREFINE; 684 } 685 686 if (PetscLogPrintInfo) lu->options.PrintStat = YES; 687 else lu->options.PrintStat = NO; 688 PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL)); 689 PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL)); 690 691 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg)); 692 if (!mpiflg || context->busy) { /* additional options */ 693 if (!mpiflg) { 694 PetscCall(PetscNew(&context)); 695 context->busy = PETSC_TRUE; 696 PetscCallMPI(MPI_Comm_dup(comm, &context->comm)); 697 PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context)); 698 } else { 699 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 700 } 701 702 /* Default number of process columns and rows */ 703 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size)); 704 if (!lu->nprow) lu->nprow = 1; 705 while (lu->nprow > 0) { 706 lu->npcol = (int_t)(size / lu->nprow); 707 if (size == lu->nprow * lu->npcol) break; 708 lu->nprow--; 709 } 710 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 711 lu->use3d = PETSC_FALSE; 712 lu->npdep = 1; 713 #endif 714 715 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 716 PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL)); 717 if (lu->use3d) { 718 PetscInt t; 719 PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL)); 720 t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep); 721 PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep); 722 if (lu->npdep > 1) { 723 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep))); 724 if (!lu->nprow) lu->nprow = 1; 725 while (lu->nprow > 0) { 726 lu->npcol = (int_t)(size / (lu->npdep * lu->nprow)); 727 if (size == lu->nprow * lu->npcol * lu->npdep) break; 728 lu->nprow--; 729 } 730 } 731 } 732 #endif 733 PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL)); 734 PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL)); 735 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 736 PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep); 737 #else 738 PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol); 739 #endif 740 /* end of adding additional options */ 741 742 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 743 if (lu->use3d) { 744 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d)); 745 if (context) { 746 context->grid3d = lu->grid3d; 747 context->use3d = lu->use3d; 748 } 749 } else { 750 #endif 751 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 752 if (context) context->grid = lu->grid; 753 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 754 } 755 #endif 756 PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n")); 757 if (mpiflg) { 758 PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n")); 759 } else { 760 PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n")); 761 } 762 } else { /* (mpiflg && !context->busy) */ 763 PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.\n")); 764 context->busy = PETSC_TRUE; 765 lu->grid = context->grid; 766 } 767 PetscOptionsEnd(); 768 769 /* Initialize ScalePermstruct and LUstruct. */ 770 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 771 if (lu->singleprecision) { 772 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", sScalePermstructInit(M, N, &lu->sScalePermstruct)); 773 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", sLUstructInit(N, &lu->sLUstruct)); 774 } else 775 #endif 776 { 777 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct)); 778 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct)); 779 } 780 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 781 F->ops->solve = MatSolve_SuperLU_DIST; 782 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 783 F->ops->getinertia = NULL; 784 785 if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 786 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info) 791 { 792 PetscFunctionBegin; 793 PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info)); 794 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 795 PetscFunctionReturn(PETSC_SUCCESS); 796 } 797 798 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type) 799 { 800 PetscFunctionBegin; 801 *type = MATSOLVERSUPERLU_DIST; 802 PetscFunctionReturn(PETSC_SUCCESS); 803 } 804 805 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer) 806 { 807 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 808 superlu_dist_options_t options; 809 810 PetscFunctionBegin; 811 /* check if matrix is superlu_dist type */ 812 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS); 813 814 options = lu->options; 815 PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n")); 816 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 817 * format spec for int64_t is set to %d for whatever reason */ 818 PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 819 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(9, 0, 0) 820 if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep)); 821 #endif 822 823 PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO])); 824 PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO])); 825 PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE])); 826 PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 827 828 switch (options.RowPerm) { 829 case NOROWPERM: 830 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n")); 831 break; 832 case LargeDiag_MC64: 833 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n")); 834 break; 835 case LargeDiag_AWPM: 836 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n")); 837 break; 838 case MY_PERMR: 839 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n")); 840 break; 841 default: 842 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 843 } 844 845 switch (options.ColPerm) { 846 case NATURAL: 847 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n")); 848 break; 849 case MMD_AT_PLUS_A: 850 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n")); 851 break; 852 case MMD_ATA: 853 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n")); 854 break; 855 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 856 case METIS_AT_PLUS_A: 857 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n")); 858 break; 859 case PARMETIS: 860 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n")); 861 break; 862 default: 863 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 864 } 865 866 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO])); 867 868 if (lu->FactPattern == SamePattern) { 869 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n")); 870 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 871 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n")); 872 } else if (lu->FactPattern == DOFACT) { 873 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n")); 874 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern"); 875 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 876 if (lu->singleprecision) PetscCall(PetscViewerASCIIPrintf(viewer, " Using SuperLU_DIST in single precision\n")); 877 #endif 878 PetscFunctionReturn(PETSC_SUCCESS); 879 } 880 881 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer) 882 { 883 PetscBool iascii; 884 PetscViewerFormat format; 885 886 PetscFunctionBegin; 887 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 888 if (iascii) { 889 PetscCall(PetscViewerGetFormat(viewer, &format)); 890 if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer)); 891 } 892 PetscFunctionReturn(PETSC_SUCCESS); 893 } 894 895 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F) 896 { 897 Mat B; 898 Mat_SuperLU_DIST *lu; 899 PetscInt M = A->rmap->N, N = A->cmap->N; 900 PetscMPIInt size; 901 superlu_dist_options_t options; 902 PetscBool flg; 903 char string[16]; 904 905 PetscFunctionBegin; 906 /* Create the factorization matrix */ 907 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 908 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N)); 909 PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name)); 910 PetscCall(MatSetUp(B)); 911 B->ops->getinfo = MatGetInfo_External; 912 B->ops->view = MatView_SuperLU_DIST; 913 B->ops->destroy = MatDestroy_SuperLU_DIST; 914 915 /* Set the default input options: 916 options.Fact = DOFACT; 917 options.Equil = YES; 918 options.ParSymbFact = NO; 919 options.ColPerm = METIS_AT_PLUS_A; 920 options.RowPerm = LargeDiag_MC64; 921 options.ReplaceTinyPivot = YES; 922 options.IterRefine = DOUBLE; 923 options.Trans = NOTRANS; 924 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 925 options.RefineInitialized = NO; 926 options.PrintStat = YES; 927 options.SymPattern = NO; 928 */ 929 set_default_options_dist(&options); 930 931 B->trivialsymbolic = PETSC_TRUE; 932 if (ftype == MAT_FACTOR_LU) { 933 B->factortype = MAT_FACTOR_LU; 934 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 935 } else { 936 B->factortype = MAT_FACTOR_CHOLESKY; 937 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 938 options.SymPattern = YES; 939 } 940 941 /* set solvertype */ 942 PetscCall(PetscFree(B->solvertype)); 943 PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype)); 944 945 PetscCall(PetscNew(&lu)); 946 B->data = lu; 947 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 948 949 lu->options = options; 950 lu->options.Fact = DOFACT; 951 lu->matsolve_iscalled = PETSC_FALSE; 952 lu->matmatsolve_iscalled = PETSC_FALSE; 953 954 PetscCall(PetscOptionsGetString(NULL, NULL, "-pc_precision", string, sizeof(string), &flg)); 955 if (flg) { 956 PetscCall(PetscStrcasecmp(string, "single", &flg)); 957 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_USER_INPUT, "-pc_precision only accepts single as option for SuperLU_DIST"); 958 #if defined(PETSC_HAVE_SUPERLU_DIST_SINGLE) 959 lu->singleprecision = PETSC_TRUE; 960 #endif 961 } 962 963 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist)); 964 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST)); 965 966 *F = B; 967 PetscFunctionReturn(PETSC_SUCCESS); 968 } 969 970 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 971 { 972 PetscFunctionBegin; 973 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 974 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 975 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 976 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 977 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 978 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_DeleteFn, &Petsc_Superlu_dist_keyval, NULL)); 979 PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free)); 980 } 981 PetscFunctionReturn(PETSC_SUCCESS); 982 } 983 984 /*MC 985 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 986 987 Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST 988 989 Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver 990 991 Works with `MATAIJ` matrices 992 993 Options Database Keys: 994 + -mat_superlu_dist_r <n> - number of rows in processor partition 995 . -mat_superlu_dist_c <n> - number of columns in processor partition 996 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 997 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided 998 . -mat_superlu_dist_equil - equilibrate the matrix 999 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 1000 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 1001 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 1002 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT` 1003 . -mat_superlu_dist_iterrefine - use iterative refinement 1004 . -mat_superlu_dist_printstat - print factorization information 1005 - -pc_precision single - use SuperLU_DIST single precision with PETSc double precision. Currently this does not accept an options prefix, so 1006 regardless of the `PC` prefix you must use no prefix here 1007 1008 Level: beginner 1009 1010 Note: 1011 If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs. 1012 1013 .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()` 1014 M*/ 1015