1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 EXTERN_C_BEGIN 10 #if defined(PETSC_USE_COMPLEX) 11 #define CASTDOUBLECOMPLEX (doublecomplex *) 12 #define CASTDOUBLECOMPLEXSTAR (doublecomplex **) 13 #include <superlu_zdefs.h> 14 #define LUstructInit zLUstructInit 15 #define ScalePermstructInit zScalePermstructInit 16 #define ScalePermstructFree zScalePermstructFree 17 #define LUstructFree zLUstructFree 18 #define Destroy_LU zDestroy_LU 19 #define ScalePermstruct_t zScalePermstruct_t 20 #define LUstruct_t zLUstruct_t 21 #define SOLVEstruct_t zSOLVEstruct_t 22 #define SolveFinalize zSolveFinalize 23 #define pGetDiagU pzGetDiagU 24 #define pgssvx pzgssvx 25 #define allocateA_dist zallocateA_dist 26 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 27 #define SLU SLU_Z 28 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 29 #define DeAllocLlu_3d zDeAllocLlu_3d 30 #define DeAllocGlu_3d zDeAllocGlu_3d 31 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 32 #define pgssvx3d pzgssvx3d 33 #endif 34 #elif defined(PETSC_USE_REAL_SINGLE) 35 #define CASTDOUBLECOMPLEX 36 #define CASTDOUBLECOMPLEXSTAR 37 #include <superlu_sdefs.h> 38 #define LUstructInit sLUstructInit 39 #define ScalePermstructInit sScalePermstructInit 40 #define ScalePermstructFree sScalePermstructFree 41 #define LUstructFree sLUstructFree 42 #define Destroy_LU sDestroy_LU 43 #define ScalePermstruct_t sScalePermstruct_t 44 #define LUstruct_t sLUstruct_t 45 #define SOLVEstruct_t sSOLVEstruct_t 46 #define SolveFinalize sSolveFinalize 47 #define pGetDiagU psGetDiagU 48 #define pgssvx psgssvx 49 #define allocateA_dist sallocateA_dist 50 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist 51 #define SLU SLU_S 52 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 53 #define DeAllocLlu_3d sDeAllocLlu_3d 54 #define DeAllocGlu_3d sDeAllocGlu_3d 55 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d 56 #define pgssvx3d psgssvx3d 57 #endif 58 #else 59 #define CASTDOUBLECOMPLEX 60 #define CASTDOUBLECOMPLEXSTAR 61 #include <superlu_ddefs.h> 62 #define LUstructInit dLUstructInit 63 #define ScalePermstructInit dScalePermstructInit 64 #define ScalePermstructFree dScalePermstructFree 65 #define LUstructFree dLUstructFree 66 #define Destroy_LU dDestroy_LU 67 #define ScalePermstruct_t dScalePermstruct_t 68 #define LUstruct_t dLUstruct_t 69 #define SOLVEstruct_t dSOLVEstruct_t 70 #define SolveFinalize dSolveFinalize 71 #define pGetDiagU pdGetDiagU 72 #define pgssvx pdgssvx 73 #define allocateA_dist dallocateA_dist 74 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 75 #define SLU SLU_D 76 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 77 #define DeAllocLlu_3d dDeAllocLlu_3d 78 #define DeAllocGlu_3d dDeAllocGlu_3d 79 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 80 #define pgssvx3d pdgssvx3d 81 #endif 82 #endif 83 EXTERN_C_END 84 85 typedef struct { 86 int_t nprow, npcol, *row, *col; 87 gridinfo_t grid; 88 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 89 PetscBool use3d; 90 int_t npdep; /* replication factor, must be power of two */ 91 gridinfo3d_t grid3d; 92 #endif 93 superlu_dist_options_t options; 94 SuperMatrix A_sup; 95 ScalePermstruct_t ScalePermstruct; 96 LUstruct_t LUstruct; 97 int StatPrint; 98 SOLVEstruct_t SOLVEstruct; 99 fact_t FactPattern; 100 MPI_Comm comm_superlu; 101 PetscScalar *val; 102 PetscBool matsolve_iscalled, matmatsolve_iscalled; 103 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 104 } Mat_SuperLU_DIST; 105 106 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU) 107 { 108 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 109 110 PetscFunctionBegin; 111 PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU)); 112 PetscFunctionReturn(PETSC_SUCCESS); 113 } 114 115 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU) 116 { 117 PetscFunctionBegin; 118 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 119 PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 124 typedef struct { 125 MPI_Comm comm; 126 PetscBool busy; 127 gridinfo_t grid; 128 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 129 PetscBool use3d; 130 gridinfo3d_t grid3d; 131 #endif 132 } PetscSuperLU_DIST; 133 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 134 135 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state) 136 { 137 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val; 138 139 PetscFunctionBegin; 140 if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval"); 141 PetscCall(PetscInfo(NULL, "Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n")); 142 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 143 if (context->use3d) { 144 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d)); 145 } else 146 #endif 147 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid)); 148 PetscCallMPI(MPI_Comm_free(&context->comm)); 149 PetscCall(PetscFree(context)); 150 PetscFunctionReturn(MPI_SUCCESS); 151 } 152 153 /* 154 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 155 users who do not destroy all PETSc objects before PetscFinalize(). 156 157 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn() 158 can still check that the keyval associated with the MPI communicator is correct when the MPI 159 communicator is destroyed. 160 161 This is called in PetscFinalize() 162 */ 163 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 164 { 165 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 166 167 PetscFunctionBegin; 168 PetscCall(PetscInfo(NULL, "Freeing Petsc_Superlu_dist_keyval\n")); 169 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp)); 170 PetscFunctionReturn(PETSC_SUCCESS); 171 } 172 173 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 174 { 175 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 176 177 PetscFunctionBegin; 178 if (lu->CleanUpSuperLU_Dist) { 179 /* Deallocate SuperLU_DIST storage */ 180 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 181 if (lu->options.SolveInitialized) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 182 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 183 if (lu->use3d) { 184 if (lu->grid3d.zscp.Iam == 0) { 185 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 186 } else { 187 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 188 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 189 } 190 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 191 } else 192 #endif 193 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 194 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct)); 195 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct)); 196 197 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 198 if (lu->comm_superlu) { 199 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 200 if (lu->use3d) { 201 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d)); 202 } else 203 #endif 204 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid)); 205 } 206 } 207 /* 208 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist. 209 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use 210 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist 211 * is off, and the communicator was not released or marked as "not busy " in the old code. 212 * Here we try to release comm regardless. 213 */ 214 if (lu->comm_superlu) { 215 PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 216 } else { 217 PetscSuperLU_DIST *context; 218 MPI_Comm comm; 219 PetscMPIInt flg; 220 221 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 222 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg)); 223 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Communicator does not have expected Petsc_Superlu_dist_keyval attribute"); 224 context->busy = PETSC_FALSE; 225 } 226 227 PetscCall(PetscFree(A->data)); 228 /* clear composed functions */ 229 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 230 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL)); 231 232 PetscFunctionReturn(PETSC_SUCCESS); 233 } 234 235 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x) 236 { 237 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 238 PetscInt m = A->rmap->n; 239 SuperLUStat_t stat; 240 PetscReal berr[1]; 241 PetscScalar *bptr = NULL; 242 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 243 static PetscBool cite = PETSC_FALSE; 244 245 PetscFunctionBegin; 246 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 247 PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM " 248 "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n", 249 &cite)); 250 251 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 252 /* see comments in MatMatSolve() */ 253 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 254 lu->options.SolveInitialized = NO; 255 } 256 PetscCall(VecCopy(b_mpi, x)); 257 PetscCall(VecGetArray(x, &bptr)); 258 259 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 260 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 261 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 262 else 263 #endif 264 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 265 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 266 267 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 268 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 269 270 PetscCall(VecRestoreArray(x, &bptr)); 271 lu->matsolve_iscalled = PETSC_TRUE; 272 lu->matmatsolve_iscalled = PETSC_FALSE; 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X) 277 { 278 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 279 PetscInt m = A->rmap->n, nrhs; 280 SuperLUStat_t stat; 281 PetscReal berr[1]; 282 PetscScalar *bptr; 283 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 284 PetscBool flg; 285 286 PetscFunctionBegin; 287 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 288 PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 289 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 290 if (X != B_mpi) { 291 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 292 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 293 } 294 295 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 296 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 297 thus destroy it and create a new SOLVEstruct. 298 Otherwise it may result in memory corruption or incorrect solution 299 See src/mat/tests/ex125.c */ 300 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 301 lu->options.SolveInitialized = NO; 302 } 303 if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN)); 304 305 PetscCall(MatGetSize(B_mpi, NULL, &nrhs)); 306 307 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 308 PetscCall(MatDenseGetArray(X, &bptr)); 309 310 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 311 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 312 else 313 #endif 314 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 315 316 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 317 PetscCall(MatDenseRestoreArray(X, &bptr)); 318 319 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 320 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 321 lu->matsolve_iscalled = PETSC_FALSE; 322 lu->matmatsolve_iscalled = PETSC_TRUE; 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 /* 327 input: 328 F: numeric Cholesky factor 329 output: 330 nneg: total number of negative pivots 331 nzero: total number of zero pivots 332 npos: (global dimension of F) - nneg - nzero 333 */ 334 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 335 { 336 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 337 PetscScalar *diagU = NULL; 338 PetscInt M, i, neg = 0, zero = 0, pos = 0; 339 PetscReal r; 340 341 PetscFunctionBegin; 342 PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled"); 343 PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM"); 344 PetscCall(MatGetSize(F, &M, NULL)); 345 PetscCall(PetscMalloc1(M, &diagU)); 346 PetscCall(MatSuperluDistGetDiagU(F, diagU)); 347 for (i = 0; i < M; i++) { 348 #if defined(PETSC_USE_COMPLEX) 349 r = PetscImaginaryPart(diagU[i]) / 10.0; 350 PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0)); 351 r = PetscRealPart(diagU[i]); 352 #else 353 r = diagU[i]; 354 #endif 355 if (r > 0) { 356 pos++; 357 } else if (r < 0) { 358 neg++; 359 } else zero++; 360 } 361 362 PetscCall(PetscFree(diagU)); 363 if (nneg) *nneg = neg; 364 if (nzero) *nzero = zero; 365 if (npos) *npos = pos; 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info) 370 { 371 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 372 Mat Aloc; 373 const PetscScalar *av; 374 const PetscInt *ai = NULL, *aj = NULL; 375 PetscInt nz, dummy; 376 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 377 SuperLUStat_t stat; 378 PetscReal *berr = 0; 379 PetscBool ismpiaij, isseqaij, flg; 380 381 PetscFunctionBegin; 382 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 383 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij)); 384 if (ismpiaij) { 385 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc)); 386 } else if (isseqaij) { 387 PetscCall(PetscObjectReference((PetscObject)A)); 388 Aloc = A; 389 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 390 391 PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 392 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed"); 393 PetscCall(MatSeqAIJGetArrayRead(Aloc, &av)); 394 nz = ai[Aloc->rmap->n]; 395 396 /* Allocations for A_sup */ 397 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 398 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 399 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 400 if (lu->FactPattern == SamePattern_SameRowPerm) { 401 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 402 } else if (lu->FactPattern == SamePattern) { 403 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 404 if (lu->use3d) { 405 if (lu->grid3d.zscp.Iam == 0) { 406 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 407 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 408 } else { 409 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 410 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 411 } 412 } else 413 #endif 414 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 415 lu->options.Fact = SamePattern; 416 } else if (lu->FactPattern == DOFACT) { 417 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 418 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 419 lu->options.Fact = DOFACT; 420 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 421 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 422 } 423 424 /* Copy AIJ matrix to superlu_dist matrix */ 425 PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1)); 426 PetscCall(PetscArraycpy(lu->col, aj, nz)); 427 PetscCall(PetscArraycpy(lu->val, av, nz)); 428 PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 429 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed"); 430 PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av)); 431 PetscCall(MatDestroy(&Aloc)); 432 433 /* Create and setup A_sup */ 434 if (lu->options.Fact == DOFACT) { 435 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE)); 436 } 437 438 /* Factor the matrix. */ 439 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 440 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 441 if (lu->use3d) { 442 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 443 } else 444 #endif 445 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 446 if (sinfo > 0) { 447 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo); 448 if (sinfo <= lu->A_sup.ncol) { 449 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 450 PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo)); 451 } else if (sinfo > lu->A_sup.ncol) { 452 /* 453 number of bytes allocated when memory allocation 454 failure occurred, plus A->ncol. 455 */ 456 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 457 PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo)); 458 } 459 } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 460 461 if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ } 462 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 463 F->assembled = PETSC_TRUE; 464 F->preallocated = PETSC_TRUE; 465 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 466 PetscFunctionReturn(PETSC_SUCCESS); 467 } 468 469 /* Note the Petsc r and c permutations are ignored */ 470 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 471 { 472 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 473 PetscInt M = A->rmap->N, N = A->cmap->N, indx; 474 PetscMPIInt size, mpiflg; 475 PetscBool flg, set; 476 const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"}; 477 const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"}; 478 const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"}; 479 MPI_Comm comm; 480 PetscSuperLU_DIST *context = NULL; 481 482 PetscFunctionBegin; 483 /* Set options to F */ 484 PetscCall(PetscObjectGetComm((PetscObject)F, &comm)); 485 PetscCallMPI(MPI_Comm_size(comm, &size)); 486 487 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat"); 488 PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 489 if (set && !flg) lu->options.Equil = NO; 490 491 PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg)); 492 if (flg) { 493 switch (indx) { 494 case 0: 495 lu->options.RowPerm = NOROWPERM; 496 break; 497 case 1: 498 lu->options.RowPerm = LargeDiag_MC64; 499 break; 500 case 2: 501 lu->options.RowPerm = LargeDiag_AWPM; 502 break; 503 case 3: 504 lu->options.RowPerm = MY_PERMR; 505 break; 506 default: 507 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation"); 508 } 509 } 510 511 PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg)); 512 if (flg) { 513 switch (indx) { 514 case 0: 515 lu->options.ColPerm = NATURAL; 516 break; 517 case 1: 518 lu->options.ColPerm = MMD_AT_PLUS_A; 519 break; 520 case 2: 521 lu->options.ColPerm = MMD_ATA; 522 break; 523 case 3: 524 lu->options.ColPerm = METIS_AT_PLUS_A; 525 break; 526 case 4: 527 lu->options.ColPerm = PARMETIS; /* only works for np>1 */ 528 break; 529 default: 530 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 531 } 532 } 533 534 lu->options.ReplaceTinyPivot = NO; 535 PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 536 if (set && flg) lu->options.ReplaceTinyPivot = YES; 537 538 lu->options.ParSymbFact = NO; 539 PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set)); 540 if (set && flg && size > 1) { 541 #if defined(PETSC_HAVE_PARMETIS) 542 lu->options.ParSymbFact = YES; 543 lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 544 #else 545 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS"); 546 #endif 547 } 548 549 lu->FactPattern = SamePattern; 550 PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg)); 551 if (flg) { 552 switch (indx) { 553 case 0: 554 lu->FactPattern = SamePattern; 555 break; 556 case 1: 557 lu->FactPattern = SamePattern_SameRowPerm; 558 break; 559 case 2: 560 lu->FactPattern = DOFACT; 561 break; 562 } 563 } 564 565 lu->options.IterRefine = NOREFINE; 566 PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set)); 567 if (set) { 568 if (flg) lu->options.IterRefine = SLU_DOUBLE; 569 else lu->options.IterRefine = NOREFINE; 570 } 571 572 if (PetscLogPrintInfo) lu->options.PrintStat = YES; 573 else lu->options.PrintStat = NO; 574 PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL)); 575 PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL)); 576 577 /* Additional options for special cases */ 578 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 579 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_Delete_Fn, &Petsc_Superlu_dist_keyval, (void *)0)); 580 PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free)); 581 } 582 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg)); 583 if (!mpiflg || context->busy) { /* additional options */ 584 if (!mpiflg) { 585 PetscCall(PetscNew(&context)); 586 context->busy = PETSC_TRUE; 587 PetscCallMPI(MPI_Comm_dup(comm, &context->comm)); 588 PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context)); 589 } else { 590 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 591 } 592 593 /* Default number of process columns and rows */ 594 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size)); 595 if (!lu->nprow) lu->nprow = 1; 596 while (lu->nprow > 0) { 597 lu->npcol = (int_t)(size / lu->nprow); 598 if (size == lu->nprow * lu->npcol) break; 599 lu->nprow--; 600 } 601 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 602 lu->use3d = PETSC_FALSE; 603 lu->npdep = 1; 604 #endif 605 606 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 607 PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL)); 608 PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP_SYS, "-mat_superlu_dist_3d requires a system with a getline() implementation"); 609 if (lu->use3d) { 610 PetscInt t; 611 PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL)); 612 t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep); 613 PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep); 614 if (lu->npdep > 1) { 615 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep))); 616 if (!lu->nprow) lu->nprow = 1; 617 while (lu->nprow > 0) { 618 lu->npcol = (int_t)(size / (lu->npdep * lu->nprow)); 619 if (size == lu->nprow * lu->npcol * lu->npdep) break; 620 lu->nprow--; 621 } 622 } 623 } 624 #endif 625 PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL)); 626 PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL)); 627 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 628 PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep); 629 #else 630 PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol); 631 #endif 632 /* end of adding additional options */ 633 634 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 635 if (lu->use3d) { 636 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d)); 637 if (context) { 638 context->grid3d = lu->grid3d; 639 context->use3d = lu->use3d; 640 } 641 } else { 642 #endif 643 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 644 if (context) context->grid = lu->grid; 645 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 646 } 647 #endif 648 PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n")); 649 if (mpiflg) { 650 PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n")); 651 } else { 652 PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n")); 653 } 654 } else { /* (mpiflg && !context->busy) */ 655 PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.")); 656 context->busy = PETSC_TRUE; 657 lu->grid = context->grid; 658 } 659 PetscOptionsEnd(); 660 661 /* Initialize ScalePermstruct and LUstruct. */ 662 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct)); 663 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct)); 664 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 665 F->ops->solve = MatSolve_SuperLU_DIST; 666 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 667 F->ops->getinertia = NULL; 668 669 if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 670 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 671 PetscFunctionReturn(PETSC_SUCCESS); 672 } 673 674 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info) 675 { 676 PetscFunctionBegin; 677 PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info)); 678 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 679 PetscFunctionReturn(PETSC_SUCCESS); 680 } 681 682 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type) 683 { 684 PetscFunctionBegin; 685 *type = MATSOLVERSUPERLU_DIST; 686 PetscFunctionReturn(PETSC_SUCCESS); 687 } 688 689 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer) 690 { 691 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 692 superlu_dist_options_t options; 693 694 PetscFunctionBegin; 695 /* check if matrix is superlu_dist type */ 696 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS); 697 698 options = lu->options; 699 PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n")); 700 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 701 * format spec for int64_t is set to %d for whatever reason */ 702 PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 703 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 704 if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep)); 705 #endif 706 707 PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO])); 708 PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO])); 709 PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE])); 710 PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 711 712 switch (options.RowPerm) { 713 case NOROWPERM: 714 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n")); 715 break; 716 case LargeDiag_MC64: 717 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n")); 718 break; 719 case LargeDiag_AWPM: 720 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n")); 721 break; 722 case MY_PERMR: 723 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n")); 724 break; 725 default: 726 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 727 } 728 729 switch (options.ColPerm) { 730 case NATURAL: 731 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n")); 732 break; 733 case MMD_AT_PLUS_A: 734 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n")); 735 break; 736 case MMD_ATA: 737 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n")); 738 break; 739 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 740 case METIS_AT_PLUS_A: 741 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n")); 742 break; 743 case PARMETIS: 744 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n")); 745 break; 746 default: 747 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 748 } 749 750 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO])); 751 752 if (lu->FactPattern == SamePattern) { 753 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n")); 754 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 755 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n")); 756 } else if (lu->FactPattern == DOFACT) { 757 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n")); 758 } else { 759 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern"); 760 } 761 PetscFunctionReturn(PETSC_SUCCESS); 762 } 763 764 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer) 765 { 766 PetscBool iascii; 767 PetscViewerFormat format; 768 769 PetscFunctionBegin; 770 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 771 if (iascii) { 772 PetscCall(PetscViewerGetFormat(viewer, &format)); 773 if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer)); 774 } 775 PetscFunctionReturn(PETSC_SUCCESS); 776 } 777 778 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F) 779 { 780 Mat B; 781 Mat_SuperLU_DIST *lu; 782 PetscInt M = A->rmap->N, N = A->cmap->N; 783 PetscMPIInt size; 784 superlu_dist_options_t options; 785 786 PetscFunctionBegin; 787 /* Create the factorization matrix */ 788 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 789 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N)); 790 PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name)); 791 PetscCall(MatSetUp(B)); 792 B->ops->getinfo = MatGetInfo_External; 793 B->ops->view = MatView_SuperLU_DIST; 794 B->ops->destroy = MatDestroy_SuperLU_DIST; 795 796 /* Set the default input options: 797 options.Fact = DOFACT; 798 options.Equil = YES; 799 options.ParSymbFact = NO; 800 options.ColPerm = METIS_AT_PLUS_A; 801 options.RowPerm = LargeDiag_MC64; 802 options.ReplaceTinyPivot = YES; 803 options.IterRefine = DOUBLE; 804 options.Trans = NOTRANS; 805 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 806 options.RefineInitialized = NO; 807 options.PrintStat = YES; 808 options.SymPattern = NO; 809 */ 810 set_default_options_dist(&options); 811 812 B->trivialsymbolic = PETSC_TRUE; 813 if (ftype == MAT_FACTOR_LU) { 814 B->factortype = MAT_FACTOR_LU; 815 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 816 } else { 817 B->factortype = MAT_FACTOR_CHOLESKY; 818 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 819 options.SymPattern = YES; 820 } 821 822 /* set solvertype */ 823 PetscCall(PetscFree(B->solvertype)); 824 PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype)); 825 826 PetscCall(PetscNew(&lu)); 827 B->data = lu; 828 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 829 830 lu->options = options; 831 lu->options.Fact = DOFACT; 832 lu->matsolve_iscalled = PETSC_FALSE; 833 lu->matmatsolve_iscalled = PETSC_FALSE; 834 835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist)); 836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST)); 837 838 *F = B; 839 PetscFunctionReturn(PETSC_SUCCESS); 840 } 841 842 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 843 { 844 PetscFunctionBegin; 845 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 846 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 847 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 848 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 849 PetscFunctionReturn(PETSC_SUCCESS); 850 } 851 852 /*MC 853 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 854 855 Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST 856 857 Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver 858 859 Works with `MATAIJ` matrices 860 861 Options Database Keys: 862 + -mat_superlu_dist_r <n> - number of rows in processor partition 863 . -mat_superlu_dist_c <n> - number of columns in processor partition 864 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 865 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided 866 . -mat_superlu_dist_equil - equilibrate the matrix 867 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 868 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 869 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 870 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT` 871 . -mat_superlu_dist_iterrefine - use iterative refinement 872 - -mat_superlu_dist_printstat - print factorization information 873 874 Level: beginner 875 876 Note: 877 If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs. 878 879 .seealso: [](chapter_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()` 880 M*/ 881