1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wundef") 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #define CASTDOUBLECOMPLEX (doublecomplex *) 13 #define CASTDOUBLECOMPLEXSTAR (doublecomplex **) 14 #include <superlu_zdefs.h> 15 #define LUstructInit zLUstructInit 16 #define ScalePermstructInit zScalePermstructInit 17 #define ScalePermstructFree zScalePermstructFree 18 #define LUstructFree zLUstructFree 19 #define Destroy_LU zDestroy_LU 20 #define ScalePermstruct_t zScalePermstruct_t 21 #define LUstruct_t zLUstruct_t 22 #define SOLVEstruct_t zSOLVEstruct_t 23 #define SolveFinalize zSolveFinalize 24 #define pGetDiagU pzGetDiagU 25 #define pgssvx pzgssvx 26 #define allocateA_dist zallocateA_dist 27 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 28 #define SLU SLU_Z 29 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 30 #define DeAllocLlu_3d zDeAllocLlu_3d 31 #define DeAllocGlu_3d zDeAllocGlu_3d 32 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 33 #define pgssvx3d pzgssvx3d 34 #endif 35 #elif defined(PETSC_USE_REAL_SINGLE) 36 #define CASTDOUBLECOMPLEX 37 #define CASTDOUBLECOMPLEXSTAR 38 #include <superlu_sdefs.h> 39 #define LUstructInit sLUstructInit 40 #define ScalePermstructInit sScalePermstructInit 41 #define ScalePermstructFree sScalePermstructFree 42 #define LUstructFree sLUstructFree 43 #define Destroy_LU sDestroy_LU 44 #define ScalePermstruct_t sScalePermstruct_t 45 #define LUstruct_t sLUstruct_t 46 #define SOLVEstruct_t sSOLVEstruct_t 47 #define SolveFinalize sSolveFinalize 48 #define pGetDiagU psGetDiagU 49 #define pgssvx psgssvx 50 #define allocateA_dist sallocateA_dist 51 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist 52 #define SLU SLU_S 53 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 54 #define DeAllocLlu_3d sDeAllocLlu_3d 55 #define DeAllocGlu_3d sDeAllocGlu_3d 56 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d 57 #define pgssvx3d psgssvx3d 58 #endif 59 #else 60 #define CASTDOUBLECOMPLEX 61 #define CASTDOUBLECOMPLEXSTAR 62 #include <superlu_ddefs.h> 63 #define LUstructInit dLUstructInit 64 #define ScalePermstructInit dScalePermstructInit 65 #define ScalePermstructFree dScalePermstructFree 66 #define LUstructFree dLUstructFree 67 #define Destroy_LU dDestroy_LU 68 #define ScalePermstruct_t dScalePermstruct_t 69 #define LUstruct_t dLUstruct_t 70 #define SOLVEstruct_t dSOLVEstruct_t 71 #define SolveFinalize dSolveFinalize 72 #define pGetDiagU pdGetDiagU 73 #define pgssvx pdgssvx 74 #define allocateA_dist dallocateA_dist 75 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 76 #define SLU SLU_D 77 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 78 #define DeAllocLlu_3d dDeAllocLlu_3d 79 #define DeAllocGlu_3d dDeAllocGlu_3d 80 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 81 #define pgssvx3d pdgssvx3d 82 #endif 83 #endif 84 EXTERN_C_END 85 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END() 86 87 typedef struct { 88 int_t nprow, npcol, *row, *col; 89 gridinfo_t grid; 90 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 91 PetscBool use3d; 92 int_t npdep; /* replication factor, must be power of two */ 93 gridinfo3d_t grid3d; 94 #endif 95 superlu_dist_options_t options; 96 SuperMatrix A_sup; 97 ScalePermstruct_t ScalePermstruct; 98 LUstruct_t LUstruct; 99 int StatPrint; 100 SOLVEstruct_t SOLVEstruct; 101 fact_t FactPattern; 102 MPI_Comm comm_superlu; 103 PetscScalar *val; 104 PetscBool matsolve_iscalled, matmatsolve_iscalled; 105 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 106 } Mat_SuperLU_DIST; 107 108 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU) 109 { 110 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 111 112 PetscFunctionBegin; 113 PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU) 118 { 119 PetscFunctionBegin; 120 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 121 PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU)); 122 PetscFunctionReturn(PETSC_SUCCESS); 123 } 124 125 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 126 typedef struct { 127 MPI_Comm comm; 128 PetscBool busy; 129 gridinfo_t grid; 130 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 131 PetscBool use3d; 132 gridinfo3d_t grid3d; 133 #endif 134 } PetscSuperLU_DIST; 135 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 136 137 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state) 138 { 139 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val; 140 141 PetscFunctionBegin; 142 if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval"); 143 PetscCall(PetscInfo(NULL, "Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n")); 144 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 145 if (context->use3d) { 146 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d)); 147 } else 148 #endif 149 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid)); 150 PetscCallMPI(MPI_Comm_free(&context->comm)); 151 PetscCall(PetscFree(context)); 152 PetscFunctionReturn(MPI_SUCCESS); 153 } 154 155 /* 156 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 157 users who do not destroy all PETSc objects before PetscFinalize(). 158 159 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn() 160 can still check that the keyval associated with the MPI communicator is correct when the MPI 161 communicator is destroyed. 162 163 This is called in PetscFinalize() 164 */ 165 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 166 { 167 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 168 169 PetscFunctionBegin; 170 PetscCall(PetscInfo(NULL, "Freeing Petsc_Superlu_dist_keyval\n")); 171 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp)); 172 PetscFunctionReturn(PETSC_SUCCESS); 173 } 174 175 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 176 { 177 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 178 179 PetscFunctionBegin; 180 if (lu->CleanUpSuperLU_Dist) { 181 /* Deallocate SuperLU_DIST storage */ 182 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 183 if (lu->options.SolveInitialized) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 184 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 185 if (lu->use3d) { 186 if (lu->grid3d.zscp.Iam == 0) { 187 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 188 } else { 189 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 190 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 191 } 192 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 193 } else 194 #endif 195 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 196 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct)); 197 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct)); 198 199 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 200 if (lu->comm_superlu) { 201 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 202 if (lu->use3d) { 203 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d)); 204 } else 205 #endif 206 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid)); 207 } 208 } 209 /* 210 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist. 211 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use 212 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist 213 * is off, and the communicator was not released or marked as "not busy " in the old code. 214 * Here we try to release comm regardless. 215 */ 216 if (lu->comm_superlu) { 217 PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 218 } else { 219 PetscSuperLU_DIST *context; 220 MPI_Comm comm; 221 PetscMPIInt flg; 222 223 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 224 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg)); 225 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Communicator does not have expected Petsc_Superlu_dist_keyval attribute"); 226 context->busy = PETSC_FALSE; 227 } 228 229 PetscCall(PetscFree(A->data)); 230 /* clear composed functions */ 231 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 232 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL)); 233 234 PetscFunctionReturn(PETSC_SUCCESS); 235 } 236 237 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x) 238 { 239 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 240 PetscInt m = A->rmap->n; 241 SuperLUStat_t stat; 242 PetscReal berr[1]; 243 PetscScalar *bptr = NULL; 244 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 245 static PetscBool cite = PETSC_FALSE; 246 247 PetscFunctionBegin; 248 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 249 PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM " 250 "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n", 251 &cite)); 252 253 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 254 /* see comments in MatMatSolve() */ 255 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 256 lu->options.SolveInitialized = NO; 257 } 258 PetscCall(VecCopy(b_mpi, x)); 259 PetscCall(VecGetArray(x, &bptr)); 260 261 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 262 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 263 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 264 else 265 #endif 266 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 267 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 268 269 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 270 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 271 272 PetscCall(VecRestoreArray(x, &bptr)); 273 lu->matsolve_iscalled = PETSC_TRUE; 274 lu->matmatsolve_iscalled = PETSC_FALSE; 275 PetscFunctionReturn(PETSC_SUCCESS); 276 } 277 278 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X) 279 { 280 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 281 PetscInt m = A->rmap->n, nrhs; 282 SuperLUStat_t stat; 283 PetscReal berr[1]; 284 PetscScalar *bptr; 285 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 286 PetscBool flg; 287 288 PetscFunctionBegin; 289 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 290 PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 291 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 292 if (X != B_mpi) { 293 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 294 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 295 } 296 297 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 298 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 299 thus destroy it and create a new SOLVEstruct. 300 Otherwise it may result in memory corruption or incorrect solution 301 See src/mat/tests/ex125.c */ 302 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 303 lu->options.SolveInitialized = NO; 304 } 305 if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN)); 306 307 PetscCall(MatGetSize(B_mpi, NULL, &nrhs)); 308 309 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 310 PetscCall(MatDenseGetArray(X, &bptr)); 311 312 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 313 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 314 else 315 #endif 316 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 317 318 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 319 PetscCall(MatDenseRestoreArray(X, &bptr)); 320 321 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 322 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 323 lu->matsolve_iscalled = PETSC_FALSE; 324 lu->matmatsolve_iscalled = PETSC_TRUE; 325 PetscFunctionReturn(PETSC_SUCCESS); 326 } 327 328 /* 329 input: 330 F: numeric Cholesky factor 331 output: 332 nneg: total number of negative pivots 333 nzero: total number of zero pivots 334 npos: (global dimension of F) - nneg - nzero 335 */ 336 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 337 { 338 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 339 PetscScalar *diagU = NULL; 340 PetscInt M, i, neg = 0, zero = 0, pos = 0; 341 PetscReal r; 342 343 PetscFunctionBegin; 344 PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled"); 345 PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM"); 346 PetscCall(MatGetSize(F, &M, NULL)); 347 PetscCall(PetscMalloc1(M, &diagU)); 348 PetscCall(MatSuperluDistGetDiagU(F, diagU)); 349 for (i = 0; i < M; i++) { 350 #if defined(PETSC_USE_COMPLEX) 351 r = PetscImaginaryPart(diagU[i]) / 10.0; 352 PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0)); 353 r = PetscRealPart(diagU[i]); 354 #else 355 r = diagU[i]; 356 #endif 357 if (r > 0) { 358 pos++; 359 } else if (r < 0) { 360 neg++; 361 } else zero++; 362 } 363 364 PetscCall(PetscFree(diagU)); 365 if (nneg) *nneg = neg; 366 if (nzero) *nzero = zero; 367 if (npos) *npos = pos; 368 PetscFunctionReturn(PETSC_SUCCESS); 369 } 370 371 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info) 372 { 373 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 374 Mat Aloc; 375 const PetscScalar *av; 376 const PetscInt *ai = NULL, *aj = NULL; 377 PetscInt nz, dummy; 378 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 379 SuperLUStat_t stat; 380 PetscReal *berr = 0; 381 PetscBool ismpiaij, isseqaij, flg; 382 383 PetscFunctionBegin; 384 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 385 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij)); 386 if (ismpiaij) { 387 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc)); 388 } else if (isseqaij) { 389 PetscCall(PetscObjectReference((PetscObject)A)); 390 Aloc = A; 391 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 392 393 PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 394 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed"); 395 PetscCall(MatSeqAIJGetArrayRead(Aloc, &av)); 396 nz = ai[Aloc->rmap->n]; 397 398 /* Allocations for A_sup */ 399 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 400 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 401 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 402 if (lu->FactPattern == SamePattern_SameRowPerm) { 403 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 404 } else if (lu->FactPattern == SamePattern) { 405 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 406 if (lu->use3d) { 407 if (lu->grid3d.zscp.Iam == 0) { 408 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 409 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 410 } else { 411 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 412 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 413 } 414 } else 415 #endif 416 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 417 lu->options.Fact = SamePattern; 418 } else if (lu->FactPattern == DOFACT) { 419 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 420 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 421 lu->options.Fact = DOFACT; 422 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 423 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 424 } 425 426 /* Copy AIJ matrix to superlu_dist matrix */ 427 PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1)); 428 PetscCall(PetscArraycpy(lu->col, aj, nz)); 429 PetscCall(PetscArraycpy(lu->val, av, nz)); 430 PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 431 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed"); 432 PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av)); 433 PetscCall(MatDestroy(&Aloc)); 434 435 /* Create and setup A_sup */ 436 if (lu->options.Fact == DOFACT) { 437 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE)); 438 } 439 440 /* Factor the matrix. */ 441 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 442 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 443 if (lu->use3d) { 444 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 445 } else 446 #endif 447 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 448 if (sinfo > 0) { 449 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo); 450 if (sinfo <= lu->A_sup.ncol) { 451 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 452 PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo)); 453 } else if (sinfo > lu->A_sup.ncol) { 454 /* 455 number of bytes allocated when memory allocation 456 failure occurred, plus A->ncol. 457 */ 458 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 459 PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo)); 460 } 461 } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 462 463 if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ } 464 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 465 F->assembled = PETSC_TRUE; 466 F->preallocated = PETSC_TRUE; 467 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 468 PetscFunctionReturn(PETSC_SUCCESS); 469 } 470 471 /* Note the Petsc r and c permutations are ignored */ 472 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 473 { 474 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 475 PetscInt M = A->rmap->N, N = A->cmap->N, indx; 476 PetscMPIInt size, mpiflg; 477 PetscBool flg, set; 478 const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"}; 479 const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"}; 480 const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"}; 481 MPI_Comm comm; 482 PetscSuperLU_DIST *context = NULL; 483 484 PetscFunctionBegin; 485 /* Set options to F */ 486 PetscCall(PetscObjectGetComm((PetscObject)F, &comm)); 487 PetscCallMPI(MPI_Comm_size(comm, &size)); 488 489 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat"); 490 PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 491 if (set && !flg) lu->options.Equil = NO; 492 493 PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg)); 494 if (flg) { 495 switch (indx) { 496 case 0: 497 lu->options.RowPerm = NOROWPERM; 498 break; 499 case 1: 500 lu->options.RowPerm = LargeDiag_MC64; 501 break; 502 case 2: 503 lu->options.RowPerm = LargeDiag_AWPM; 504 break; 505 case 3: 506 lu->options.RowPerm = MY_PERMR; 507 break; 508 default: 509 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation"); 510 } 511 } 512 513 PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg)); 514 if (flg) { 515 switch (indx) { 516 case 0: 517 lu->options.ColPerm = NATURAL; 518 break; 519 case 1: 520 lu->options.ColPerm = MMD_AT_PLUS_A; 521 break; 522 case 2: 523 lu->options.ColPerm = MMD_ATA; 524 break; 525 case 3: 526 lu->options.ColPerm = METIS_AT_PLUS_A; 527 break; 528 case 4: 529 lu->options.ColPerm = PARMETIS; /* only works for np>1 */ 530 break; 531 default: 532 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 533 } 534 } 535 536 lu->options.ReplaceTinyPivot = NO; 537 PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 538 if (set && flg) lu->options.ReplaceTinyPivot = YES; 539 540 lu->options.ParSymbFact = NO; 541 PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set)); 542 if (set && flg && size > 1) { 543 #if defined(PETSC_HAVE_PARMETIS) 544 lu->options.ParSymbFact = YES; 545 lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 546 #else 547 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS"); 548 #endif 549 } 550 551 lu->FactPattern = SamePattern; 552 PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg)); 553 if (flg) { 554 switch (indx) { 555 case 0: 556 lu->FactPattern = SamePattern; 557 break; 558 case 1: 559 lu->FactPattern = SamePattern_SameRowPerm; 560 break; 561 case 2: 562 lu->FactPattern = DOFACT; 563 break; 564 } 565 } 566 567 lu->options.IterRefine = NOREFINE; 568 PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set)); 569 if (set) { 570 if (flg) lu->options.IterRefine = SLU_DOUBLE; 571 else lu->options.IterRefine = NOREFINE; 572 } 573 574 if (PetscLogPrintInfo) lu->options.PrintStat = YES; 575 else lu->options.PrintStat = NO; 576 PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL)); 577 PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL)); 578 579 /* Additional options for special cases */ 580 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 581 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_Delete_Fn, &Petsc_Superlu_dist_keyval, (void *)0)); 582 PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free)); 583 } 584 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg)); 585 if (!mpiflg || context->busy) { /* additional options */ 586 if (!mpiflg) { 587 PetscCall(PetscNew(&context)); 588 context->busy = PETSC_TRUE; 589 PetscCallMPI(MPI_Comm_dup(comm, &context->comm)); 590 PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context)); 591 } else { 592 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 593 } 594 595 /* Default number of process columns and rows */ 596 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size)); 597 if (!lu->nprow) lu->nprow = 1; 598 while (lu->nprow > 0) { 599 lu->npcol = (int_t)(size / lu->nprow); 600 if (size == lu->nprow * lu->npcol) break; 601 lu->nprow--; 602 } 603 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 604 lu->use3d = PETSC_FALSE; 605 lu->npdep = 1; 606 #endif 607 608 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 609 PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL)); 610 PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP_SYS, "-mat_superlu_dist_3d requires a system with a getline() implementation"); 611 if (lu->use3d) { 612 PetscInt t; 613 PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL)); 614 t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep); 615 PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep); 616 if (lu->npdep > 1) { 617 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep))); 618 if (!lu->nprow) lu->nprow = 1; 619 while (lu->nprow > 0) { 620 lu->npcol = (int_t)(size / (lu->npdep * lu->nprow)); 621 if (size == lu->nprow * lu->npcol * lu->npdep) break; 622 lu->nprow--; 623 } 624 } 625 } 626 #endif 627 PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL)); 628 PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL)); 629 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 630 PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep); 631 #else 632 PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol); 633 #endif 634 /* end of adding additional options */ 635 636 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 637 if (lu->use3d) { 638 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d)); 639 if (context) { 640 context->grid3d = lu->grid3d; 641 context->use3d = lu->use3d; 642 } 643 } else { 644 #endif 645 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 646 if (context) context->grid = lu->grid; 647 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 648 } 649 #endif 650 PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n")); 651 if (mpiflg) { 652 PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n")); 653 } else { 654 PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n")); 655 } 656 } else { /* (mpiflg && !context->busy) */ 657 PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.")); 658 context->busy = PETSC_TRUE; 659 lu->grid = context->grid; 660 } 661 PetscOptionsEnd(); 662 663 /* Initialize ScalePermstruct and LUstruct. */ 664 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct)); 665 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct)); 666 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 667 F->ops->solve = MatSolve_SuperLU_DIST; 668 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 669 F->ops->getinertia = NULL; 670 671 if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 672 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 673 PetscFunctionReturn(PETSC_SUCCESS); 674 } 675 676 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info) 677 { 678 PetscFunctionBegin; 679 PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info)); 680 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 681 PetscFunctionReturn(PETSC_SUCCESS); 682 } 683 684 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type) 685 { 686 PetscFunctionBegin; 687 *type = MATSOLVERSUPERLU_DIST; 688 PetscFunctionReturn(PETSC_SUCCESS); 689 } 690 691 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer) 692 { 693 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 694 superlu_dist_options_t options; 695 696 PetscFunctionBegin; 697 /* check if matrix is superlu_dist type */ 698 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS); 699 700 options = lu->options; 701 PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n")); 702 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 703 * format spec for int64_t is set to %d for whatever reason */ 704 PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 705 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 706 if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep)); 707 #endif 708 709 PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO])); 710 PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO])); 711 PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE])); 712 PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 713 714 switch (options.RowPerm) { 715 case NOROWPERM: 716 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n")); 717 break; 718 case LargeDiag_MC64: 719 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n")); 720 break; 721 case LargeDiag_AWPM: 722 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n")); 723 break; 724 case MY_PERMR: 725 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n")); 726 break; 727 default: 728 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 729 } 730 731 switch (options.ColPerm) { 732 case NATURAL: 733 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n")); 734 break; 735 case MMD_AT_PLUS_A: 736 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n")); 737 break; 738 case MMD_ATA: 739 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n")); 740 break; 741 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 742 case METIS_AT_PLUS_A: 743 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n")); 744 break; 745 case PARMETIS: 746 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n")); 747 break; 748 default: 749 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 750 } 751 752 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO])); 753 754 if (lu->FactPattern == SamePattern) { 755 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n")); 756 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 757 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n")); 758 } else if (lu->FactPattern == DOFACT) { 759 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n")); 760 } else { 761 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern"); 762 } 763 PetscFunctionReturn(PETSC_SUCCESS); 764 } 765 766 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer) 767 { 768 PetscBool iascii; 769 PetscViewerFormat format; 770 771 PetscFunctionBegin; 772 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 773 if (iascii) { 774 PetscCall(PetscViewerGetFormat(viewer, &format)); 775 if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer)); 776 } 777 PetscFunctionReturn(PETSC_SUCCESS); 778 } 779 780 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F) 781 { 782 Mat B; 783 Mat_SuperLU_DIST *lu; 784 PetscInt M = A->rmap->N, N = A->cmap->N; 785 PetscMPIInt size; 786 superlu_dist_options_t options; 787 788 PetscFunctionBegin; 789 /* Create the factorization matrix */ 790 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 791 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N)); 792 PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name)); 793 PetscCall(MatSetUp(B)); 794 B->ops->getinfo = MatGetInfo_External; 795 B->ops->view = MatView_SuperLU_DIST; 796 B->ops->destroy = MatDestroy_SuperLU_DIST; 797 798 /* Set the default input options: 799 options.Fact = DOFACT; 800 options.Equil = YES; 801 options.ParSymbFact = NO; 802 options.ColPerm = METIS_AT_PLUS_A; 803 options.RowPerm = LargeDiag_MC64; 804 options.ReplaceTinyPivot = YES; 805 options.IterRefine = DOUBLE; 806 options.Trans = NOTRANS; 807 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 808 options.RefineInitialized = NO; 809 options.PrintStat = YES; 810 options.SymPattern = NO; 811 */ 812 set_default_options_dist(&options); 813 814 B->trivialsymbolic = PETSC_TRUE; 815 if (ftype == MAT_FACTOR_LU) { 816 B->factortype = MAT_FACTOR_LU; 817 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 818 } else { 819 B->factortype = MAT_FACTOR_CHOLESKY; 820 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 821 options.SymPattern = YES; 822 } 823 824 /* set solvertype */ 825 PetscCall(PetscFree(B->solvertype)); 826 PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype)); 827 828 PetscCall(PetscNew(&lu)); 829 B->data = lu; 830 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 831 832 lu->options = options; 833 lu->options.Fact = DOFACT; 834 lu->matsolve_iscalled = PETSC_FALSE; 835 lu->matmatsolve_iscalled = PETSC_FALSE; 836 837 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist)); 838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST)); 839 840 *F = B; 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 845 { 846 PetscFunctionBegin; 847 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 848 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 849 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 850 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 851 PetscFunctionReturn(PETSC_SUCCESS); 852 } 853 854 /*MC 855 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 856 857 Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST 858 859 Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver 860 861 Works with `MATAIJ` matrices 862 863 Options Database Keys: 864 + -mat_superlu_dist_r <n> - number of rows in processor partition 865 . -mat_superlu_dist_c <n> - number of columns in processor partition 866 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 867 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided 868 . -mat_superlu_dist_equil - equilibrate the matrix 869 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 870 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 871 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 872 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT` 873 . -mat_superlu_dist_iterrefine - use iterative refinement 874 - -mat_superlu_dist_printstat - print factorization information 875 876 Level: beginner 877 878 Note: 879 If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs. 880 881 .seealso: [](ch_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()` 882 M*/ 883