1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 EXTERN_C_BEGIN 10 #if defined(PETSC_USE_COMPLEX) 11 #define CASTDOUBLECOMPLEX (doublecomplex *) 12 #define CASTDOUBLECOMPLEXSTAR (doublecomplex **) 13 #include <superlu_zdefs.h> 14 #define LUstructInit zLUstructInit 15 #define ScalePermstructInit zScalePermstructInit 16 #define ScalePermstructFree zScalePermstructFree 17 #define LUstructFree zLUstructFree 18 #define Destroy_LU zDestroy_LU 19 #define ScalePermstruct_t zScalePermstruct_t 20 #define LUstruct_t zLUstruct_t 21 #define SOLVEstruct_t zSOLVEstruct_t 22 #define SolveFinalize zSolveFinalize 23 #define pGetDiagU pzGetDiagU 24 #define pgssvx pzgssvx 25 #define allocateA_dist zallocateA_dist 26 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 27 #define SLU SLU_Z 28 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 29 #define DeAllocLlu_3d zDeAllocLlu_3d 30 #define DeAllocGlu_3d zDeAllocGlu_3d 31 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 32 #define pgssvx3d pzgssvx3d 33 #endif 34 #elif defined(PETSC_USE_REAL_SINGLE) 35 #define CASTDOUBLECOMPLEX 36 #define CASTDOUBLECOMPLEXSTAR 37 #include <superlu_sdefs.h> 38 #define LUstructInit sLUstructInit 39 #define ScalePermstructInit sScalePermstructInit 40 #define ScalePermstructFree sScalePermstructFree 41 #define LUstructFree sLUstructFree 42 #define Destroy_LU sDestroy_LU 43 #define ScalePermstruct_t sScalePermstruct_t 44 #define LUstruct_t sLUstruct_t 45 #define SOLVEstruct_t sSOLVEstruct_t 46 #define SolveFinalize sSolveFinalize 47 #define pGetDiagU psGetDiagU 48 #define pgssvx psgssvx 49 #define allocateA_dist sallocateA_dist 50 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist 51 #define SLU SLU_S 52 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 53 #define DeAllocLlu_3d sDeAllocLlu_3d 54 #define DeAllocGlu_3d sDeAllocGlu_3d 55 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d 56 #define pgssvx3d psgssvx3d 57 #endif 58 #else 59 #define CASTDOUBLECOMPLEX 60 #define CASTDOUBLECOMPLEXSTAR 61 #include <superlu_ddefs.h> 62 #define LUstructInit dLUstructInit 63 #define ScalePermstructInit dScalePermstructInit 64 #define ScalePermstructFree dScalePermstructFree 65 #define LUstructFree dLUstructFree 66 #define Destroy_LU dDestroy_LU 67 #define ScalePermstruct_t dScalePermstruct_t 68 #define LUstruct_t dLUstruct_t 69 #define SOLVEstruct_t dSOLVEstruct_t 70 #define SolveFinalize dSolveFinalize 71 #define pGetDiagU pdGetDiagU 72 #define pgssvx pdgssvx 73 #define allocateA_dist dallocateA_dist 74 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 75 #define SLU SLU_D 76 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 77 #define DeAllocLlu_3d dDeAllocLlu_3d 78 #define DeAllocGlu_3d dDeAllocGlu_3d 79 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 80 #define pgssvx3d pdgssvx3d 81 #endif 82 #endif 83 EXTERN_C_END 84 85 typedef struct { 86 int_t nprow, npcol, *row, *col; 87 gridinfo_t grid; 88 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 89 PetscBool use3d; 90 int_t npdep; /* replication factor, must be power of two */ 91 gridinfo3d_t grid3d; 92 #endif 93 superlu_dist_options_t options; 94 SuperMatrix A_sup; 95 ScalePermstruct_t ScalePermstruct; 96 LUstruct_t LUstruct; 97 int StatPrint; 98 SOLVEstruct_t SOLVEstruct; 99 fact_t FactPattern; 100 MPI_Comm comm_superlu; 101 PetscScalar *val; 102 PetscBool matsolve_iscalled, matmatsolve_iscalled; 103 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 104 } Mat_SuperLU_DIST; 105 106 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU) 107 { 108 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 109 110 PetscFunctionBegin; 111 PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU)); 112 PetscFunctionReturn(PETSC_SUCCESS); 113 } 114 115 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU) 116 { 117 PetscFunctionBegin; 118 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 119 PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 124 typedef struct { 125 MPI_Comm comm; 126 PetscBool busy; 127 gridinfo_t grid; 128 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 129 PetscBool use3d; 130 gridinfo3d_t grid3d; 131 #endif 132 } PetscSuperLU_DIST; 133 134 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 135 136 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state) 137 { 138 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val; 139 140 PetscFunctionBegin; 141 if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval"); 142 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 143 if (context->use3d) { 144 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d)); 145 } else 146 #endif 147 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid)); 148 PetscCallMPI(MPI_Comm_free(&context->comm)); 149 PetscCall(PetscFree(context)); 150 PetscFunctionReturn(MPI_SUCCESS); 151 } 152 153 /* 154 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 155 users who do not destroy all PETSc objects before PetscFinalize(). 156 157 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn() 158 can still check that the keyval associated with the MPI communicator is correct when the MPI 159 communicator is destroyed. 160 161 This is called in PetscFinalize() 162 */ 163 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 164 { 165 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 166 167 PetscFunctionBegin; 168 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp)); 169 PetscFunctionReturn(PETSC_SUCCESS); 170 } 171 172 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 173 { 174 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 175 176 PetscFunctionBegin; 177 if (lu->CleanUpSuperLU_Dist) { 178 /* Deallocate SuperLU_DIST storage */ 179 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 180 if (lu->options.SolveInitialized) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 181 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 182 if (lu->use3d) { 183 if (lu->grid3d.zscp.Iam == 0) { 184 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 185 } else { 186 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 187 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 188 } 189 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 190 } else 191 #endif 192 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 193 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct)); 194 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct)); 195 196 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 197 if (lu->comm_superlu) { 198 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 199 if (lu->use3d) { 200 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d)); 201 } else 202 #endif 203 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid)); 204 } 205 } 206 /* 207 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist. 208 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use 209 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist 210 * is off, and the communicator was not released or marked as "not busy " in the old code. 211 * Here we try to release comm regardless. 212 */ 213 if (lu->comm_superlu) { 214 PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 215 } else { 216 PetscSuperLU_DIST *context; 217 MPI_Comm comm; 218 PetscMPIInt flg; 219 220 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 221 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg)); 222 if (flg) context->busy = PETSC_FALSE; 223 } 224 225 PetscCall(PetscFree(A->data)); 226 /* clear composed functions */ 227 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 228 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL)); 229 230 PetscFunctionReturn(PETSC_SUCCESS); 231 } 232 233 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x) 234 { 235 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 236 PetscInt m = A->rmap->n; 237 SuperLUStat_t stat; 238 PetscReal berr[1]; 239 PetscScalar *bptr = NULL; 240 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 241 static PetscBool cite = PETSC_FALSE; 242 243 PetscFunctionBegin; 244 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 245 PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM " 246 "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n", 247 &cite)); 248 249 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 250 /* see comments in MatMatSolve() */ 251 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 252 lu->options.SolveInitialized = NO; 253 } 254 PetscCall(VecCopy(b_mpi, x)); 255 PetscCall(VecGetArray(x, &bptr)); 256 257 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 258 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 259 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 260 else 261 #endif 262 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 263 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 264 265 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 266 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 267 268 PetscCall(VecRestoreArray(x, &bptr)); 269 lu->matsolve_iscalled = PETSC_TRUE; 270 lu->matmatsolve_iscalled = PETSC_FALSE; 271 PetscFunctionReturn(PETSC_SUCCESS); 272 } 273 274 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X) 275 { 276 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 277 PetscInt m = A->rmap->n, nrhs; 278 SuperLUStat_t stat; 279 PetscReal berr[1]; 280 PetscScalar *bptr; 281 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 282 PetscBool flg; 283 284 PetscFunctionBegin; 285 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 286 PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 287 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 288 if (X != B_mpi) { 289 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 290 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 291 } 292 293 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 294 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 295 thus destroy it and create a new SOLVEstruct. 296 Otherwise it may result in memory corruption or incorrect solution 297 See src/mat/tests/ex125.c */ 298 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 299 lu->options.SolveInitialized = NO; 300 } 301 if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN)); 302 303 PetscCall(MatGetSize(B_mpi, NULL, &nrhs)); 304 305 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 306 PetscCall(MatDenseGetArray(X, &bptr)); 307 308 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 309 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 310 else 311 #endif 312 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 313 314 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 315 PetscCall(MatDenseRestoreArray(X, &bptr)); 316 317 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 318 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 319 lu->matsolve_iscalled = PETSC_FALSE; 320 lu->matmatsolve_iscalled = PETSC_TRUE; 321 PetscFunctionReturn(PETSC_SUCCESS); 322 } 323 324 /* 325 input: 326 F: numeric Cholesky factor 327 output: 328 nneg: total number of negative pivots 329 nzero: total number of zero pivots 330 npos: (global dimension of F) - nneg - nzero 331 */ 332 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 333 { 334 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 335 PetscScalar *diagU = NULL; 336 PetscInt M, i, neg = 0, zero = 0, pos = 0; 337 PetscReal r; 338 339 PetscFunctionBegin; 340 PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled"); 341 PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM"); 342 PetscCall(MatGetSize(F, &M, NULL)); 343 PetscCall(PetscMalloc1(M, &diagU)); 344 PetscCall(MatSuperluDistGetDiagU(F, diagU)); 345 for (i = 0; i < M; i++) { 346 #if defined(PETSC_USE_COMPLEX) 347 r = PetscImaginaryPart(diagU[i]) / 10.0; 348 PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0)); 349 r = PetscRealPart(diagU[i]); 350 #else 351 r = diagU[i]; 352 #endif 353 if (r > 0) { 354 pos++; 355 } else if (r < 0) { 356 neg++; 357 } else zero++; 358 } 359 360 PetscCall(PetscFree(diagU)); 361 if (nneg) *nneg = neg; 362 if (nzero) *nzero = zero; 363 if (npos) *npos = pos; 364 PetscFunctionReturn(PETSC_SUCCESS); 365 } 366 367 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info) 368 { 369 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 370 Mat Aloc; 371 const PetscScalar *av; 372 const PetscInt *ai = NULL, *aj = NULL; 373 PetscInt nz, dummy; 374 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 375 SuperLUStat_t stat; 376 PetscReal *berr = 0; 377 PetscBool ismpiaij, isseqaij, flg; 378 379 PetscFunctionBegin; 380 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 381 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij)); 382 if (ismpiaij) { 383 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc)); 384 } else if (isseqaij) { 385 PetscCall(PetscObjectReference((PetscObject)A)); 386 Aloc = A; 387 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 388 389 PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 390 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed"); 391 PetscCall(MatSeqAIJGetArrayRead(Aloc, &av)); 392 nz = ai[Aloc->rmap->n]; 393 394 /* Allocations for A_sup */ 395 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 396 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 397 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 398 if (lu->FactPattern == SamePattern_SameRowPerm) { 399 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 400 } else if (lu->FactPattern == SamePattern) { 401 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 402 if (lu->use3d) { 403 if (lu->grid3d.zscp.Iam == 0) { 404 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 405 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 406 } else { 407 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 408 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 409 } 410 } else 411 #endif 412 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 413 lu->options.Fact = SamePattern; 414 } else if (lu->FactPattern == DOFACT) { 415 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 416 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 417 lu->options.Fact = DOFACT; 418 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 419 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 420 } 421 422 /* Copy AIJ matrix to superlu_dist matrix */ 423 PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1)); 424 PetscCall(PetscArraycpy(lu->col, aj, nz)); 425 PetscCall(PetscArraycpy(lu->val, av, nz)); 426 PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 427 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed"); 428 PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av)); 429 PetscCall(MatDestroy(&Aloc)); 430 431 /* Create and setup A_sup */ 432 if (lu->options.Fact == DOFACT) { 433 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE)); 434 } 435 436 /* Factor the matrix. */ 437 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 438 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 439 if (lu->use3d) { 440 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 441 } else 442 #endif 443 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 444 if (sinfo > 0) { 445 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo); 446 if (sinfo <= lu->A_sup.ncol) { 447 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 448 PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo)); 449 } else if (sinfo > lu->A_sup.ncol) { 450 /* 451 number of bytes allocated when memory allocation 452 failure occurred, plus A->ncol. 453 */ 454 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 455 PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo)); 456 } 457 } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 458 459 if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ } 460 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 461 F->assembled = PETSC_TRUE; 462 F->preallocated = PETSC_TRUE; 463 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 464 PetscFunctionReturn(PETSC_SUCCESS); 465 } 466 467 /* Note the Petsc r and c permutations are ignored */ 468 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 469 { 470 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 471 PetscInt M = A->rmap->N, N = A->cmap->N, indx; 472 PetscMPIInt size, mpiflg; 473 PetscBool flg, set; 474 const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"}; 475 const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"}; 476 const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"}; 477 MPI_Comm comm; 478 PetscSuperLU_DIST *context = NULL; 479 480 PetscFunctionBegin; 481 /* Set options to F */ 482 PetscCall(PetscObjectGetComm((PetscObject)F, &comm)); 483 PetscCallMPI(MPI_Comm_size(comm, &size)); 484 485 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat"); 486 PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 487 if (set && !flg) lu->options.Equil = NO; 488 489 PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg)); 490 if (flg) { 491 switch (indx) { 492 case 0: 493 lu->options.RowPerm = NOROWPERM; 494 break; 495 case 1: 496 lu->options.RowPerm = LargeDiag_MC64; 497 break; 498 case 2: 499 lu->options.RowPerm = LargeDiag_AWPM; 500 break; 501 case 3: 502 lu->options.RowPerm = MY_PERMR; 503 break; 504 default: 505 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation"); 506 } 507 } 508 509 PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg)); 510 if (flg) { 511 switch (indx) { 512 case 0: 513 lu->options.ColPerm = NATURAL; 514 break; 515 case 1: 516 lu->options.ColPerm = MMD_AT_PLUS_A; 517 break; 518 case 2: 519 lu->options.ColPerm = MMD_ATA; 520 break; 521 case 3: 522 lu->options.ColPerm = METIS_AT_PLUS_A; 523 break; 524 case 4: 525 lu->options.ColPerm = PARMETIS; /* only works for np>1 */ 526 break; 527 default: 528 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 529 } 530 } 531 532 lu->options.ReplaceTinyPivot = NO; 533 PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 534 if (set && flg) lu->options.ReplaceTinyPivot = YES; 535 536 lu->options.ParSymbFact = NO; 537 PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set)); 538 if (set && flg && size > 1) { 539 #if defined(PETSC_HAVE_PARMETIS) 540 lu->options.ParSymbFact = YES; 541 lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 542 #else 543 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS"); 544 #endif 545 } 546 547 lu->FactPattern = SamePattern; 548 PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg)); 549 if (flg) { 550 switch (indx) { 551 case 0: 552 lu->FactPattern = SamePattern; 553 break; 554 case 1: 555 lu->FactPattern = SamePattern_SameRowPerm; 556 break; 557 case 2: 558 lu->FactPattern = DOFACT; 559 break; 560 } 561 } 562 563 lu->options.IterRefine = NOREFINE; 564 PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set)); 565 if (set) { 566 if (flg) lu->options.IterRefine = SLU_DOUBLE; 567 else lu->options.IterRefine = NOREFINE; 568 } 569 570 if (PetscLogPrintInfo) lu->options.PrintStat = YES; 571 else lu->options.PrintStat = NO; 572 PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL)); 573 PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL)); 574 575 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg)); 576 if (!mpiflg || context->busy) { /* additional options */ 577 if (!mpiflg) { 578 PetscCall(PetscNew(&context)); 579 context->busy = PETSC_TRUE; 580 PetscCallMPI(MPI_Comm_dup(comm, &context->comm)); 581 PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context)); 582 } else { 583 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 584 } 585 586 /* Default number of process columns and rows */ 587 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size)); 588 if (!lu->nprow) lu->nprow = 1; 589 while (lu->nprow > 0) { 590 lu->npcol = (int_t)(size / lu->nprow); 591 if (size == lu->nprow * lu->npcol) break; 592 lu->nprow--; 593 } 594 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 595 lu->use3d = PETSC_FALSE; 596 lu->npdep = 1; 597 #endif 598 599 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 600 PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL)); 601 PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP_SYS, "-mat_superlu_dist_3d requires a system with a getline() implementation"); 602 if (lu->use3d) { 603 PetscInt t; 604 PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL)); 605 t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep); 606 PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep); 607 if (lu->npdep > 1) { 608 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep))); 609 if (!lu->nprow) lu->nprow = 1; 610 while (lu->nprow > 0) { 611 lu->npcol = (int_t)(size / (lu->npdep * lu->nprow)); 612 if (size == lu->nprow * lu->npcol * lu->npdep) break; 613 lu->nprow--; 614 } 615 } 616 } 617 #endif 618 PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL)); 619 PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL)); 620 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 621 PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep); 622 #else 623 PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol); 624 #endif 625 /* end of adding additional options */ 626 627 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 628 if (lu->use3d) { 629 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d)); 630 if (context) { 631 context->grid3d = lu->grid3d; 632 context->use3d = lu->use3d; 633 } 634 } else { 635 #endif 636 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 637 if (context) context->grid = lu->grid; 638 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 639 } 640 #endif 641 PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n")); 642 if (mpiflg) { 643 PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n")); 644 } else { 645 PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n")); 646 } 647 } else { /* (mpiflg && !context->busy) */ 648 PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.")); 649 context->busy = PETSC_TRUE; 650 lu->grid = context->grid; 651 } 652 PetscOptionsEnd(); 653 654 /* Initialize ScalePermstruct and LUstruct. */ 655 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct)); 656 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct)); 657 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 658 F->ops->solve = MatSolve_SuperLU_DIST; 659 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 660 F->ops->getinertia = NULL; 661 662 if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 663 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 664 PetscFunctionReturn(PETSC_SUCCESS); 665 } 666 667 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info) 668 { 669 PetscFunctionBegin; 670 PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info)); 671 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 672 PetscFunctionReturn(PETSC_SUCCESS); 673 } 674 675 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type) 676 { 677 PetscFunctionBegin; 678 *type = MATSOLVERSUPERLU_DIST; 679 PetscFunctionReturn(PETSC_SUCCESS); 680 } 681 682 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer) 683 { 684 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 685 superlu_dist_options_t options; 686 687 PetscFunctionBegin; 688 /* check if matrix is superlu_dist type */ 689 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS); 690 691 options = lu->options; 692 PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n")); 693 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 694 * format spec for int64_t is set to %d for whatever reason */ 695 PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 696 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 697 if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep)); 698 #endif 699 700 PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO])); 701 PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO])); 702 PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE])); 703 PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 704 705 switch (options.RowPerm) { 706 case NOROWPERM: 707 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n")); 708 break; 709 case LargeDiag_MC64: 710 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n")); 711 break; 712 case LargeDiag_AWPM: 713 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n")); 714 break; 715 case MY_PERMR: 716 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n")); 717 break; 718 default: 719 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 720 } 721 722 switch (options.ColPerm) { 723 case NATURAL: 724 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n")); 725 break; 726 case MMD_AT_PLUS_A: 727 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n")); 728 break; 729 case MMD_ATA: 730 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n")); 731 break; 732 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 733 case METIS_AT_PLUS_A: 734 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n")); 735 break; 736 case PARMETIS: 737 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n")); 738 break; 739 default: 740 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 741 } 742 743 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO])); 744 745 if (lu->FactPattern == SamePattern) { 746 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n")); 747 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 748 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n")); 749 } else if (lu->FactPattern == DOFACT) { 750 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n")); 751 } else { 752 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern"); 753 } 754 PetscFunctionReturn(PETSC_SUCCESS); 755 } 756 757 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer) 758 { 759 PetscBool iascii; 760 PetscViewerFormat format; 761 762 PetscFunctionBegin; 763 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 764 if (iascii) { 765 PetscCall(PetscViewerGetFormat(viewer, &format)); 766 if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer)); 767 } 768 PetscFunctionReturn(PETSC_SUCCESS); 769 } 770 771 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F) 772 { 773 Mat B; 774 Mat_SuperLU_DIST *lu; 775 PetscInt M = A->rmap->N, N = A->cmap->N; 776 PetscMPIInt size; 777 superlu_dist_options_t options; 778 779 PetscFunctionBegin; 780 /* Create the factorization matrix */ 781 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 782 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N)); 783 PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name)); 784 PetscCall(MatSetUp(B)); 785 B->ops->getinfo = MatGetInfo_External; 786 B->ops->view = MatView_SuperLU_DIST; 787 B->ops->destroy = MatDestroy_SuperLU_DIST; 788 789 /* Set the default input options: 790 options.Fact = DOFACT; 791 options.Equil = YES; 792 options.ParSymbFact = NO; 793 options.ColPerm = METIS_AT_PLUS_A; 794 options.RowPerm = LargeDiag_MC64; 795 options.ReplaceTinyPivot = YES; 796 options.IterRefine = DOUBLE; 797 options.Trans = NOTRANS; 798 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 799 options.RefineInitialized = NO; 800 options.PrintStat = YES; 801 options.SymPattern = NO; 802 */ 803 set_default_options_dist(&options); 804 805 B->trivialsymbolic = PETSC_TRUE; 806 if (ftype == MAT_FACTOR_LU) { 807 B->factortype = MAT_FACTOR_LU; 808 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 809 } else { 810 B->factortype = MAT_FACTOR_CHOLESKY; 811 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 812 options.SymPattern = YES; 813 } 814 815 /* set solvertype */ 816 PetscCall(PetscFree(B->solvertype)); 817 PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype)); 818 819 PetscCall(PetscNew(&lu)); 820 B->data = lu; 821 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 822 823 lu->options = options; 824 lu->options.Fact = DOFACT; 825 lu->matsolve_iscalled = PETSC_FALSE; 826 lu->matmatsolve_iscalled = PETSC_FALSE; 827 828 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist)); 829 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST)); 830 831 *F = B; 832 PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 836 { 837 PetscFunctionBegin; 838 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 839 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 840 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 841 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 842 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 843 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_Delete_Fn, &Petsc_Superlu_dist_keyval, NULL)); 844 PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free)); 845 } 846 PetscFunctionReturn(PETSC_SUCCESS); 847 } 848 849 /*MC 850 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 851 852 Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST 853 854 Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver 855 856 Works with `MATAIJ` matrices 857 858 Options Database Keys: 859 + -mat_superlu_dist_r <n> - number of rows in processor partition 860 . -mat_superlu_dist_c <n> - number of columns in processor partition 861 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 862 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided 863 . -mat_superlu_dist_equil - equilibrate the matrix 864 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 865 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 866 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 867 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT` 868 . -mat_superlu_dist_iterrefine - use iterative refinement 869 - -mat_superlu_dist_printstat - print factorization information 870 871 Level: beginner 872 873 Note: 874 If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs. 875 876 .seealso: [](chapter_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()` 877 M*/ 878