1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_BEGIN("-Wundef") 10 EXTERN_C_BEGIN 11 #if defined(PETSC_USE_COMPLEX) 12 #define CASTDOUBLECOMPLEX (doublecomplex *) 13 #define CASTDOUBLECOMPLEXSTAR (doublecomplex **) 14 #include <superlu_zdefs.h> 15 #define LUstructInit zLUstructInit 16 #define ScalePermstructInit zScalePermstructInit 17 #define ScalePermstructFree zScalePermstructFree 18 #define LUstructFree zLUstructFree 19 #define Destroy_LU zDestroy_LU 20 #define ScalePermstruct_t zScalePermstruct_t 21 #define LUstruct_t zLUstruct_t 22 #define SOLVEstruct_t zSOLVEstruct_t 23 #define SolveFinalize zSolveFinalize 24 #define pGetDiagU pzGetDiagU 25 #define pgssvx pzgssvx 26 #define allocateA_dist zallocateA_dist 27 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 28 #define SLU SLU_Z 29 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 30 #define DeAllocLlu_3d zDeAllocLlu_3d 31 #define DeAllocGlu_3d zDeAllocGlu_3d 32 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 33 #define pgssvx3d pzgssvx3d 34 #endif 35 #elif defined(PETSC_USE_REAL_SINGLE) 36 #define CASTDOUBLECOMPLEX 37 #define CASTDOUBLECOMPLEXSTAR 38 #include <superlu_sdefs.h> 39 #define LUstructInit sLUstructInit 40 #define ScalePermstructInit sScalePermstructInit 41 #define ScalePermstructFree sScalePermstructFree 42 #define LUstructFree sLUstructFree 43 #define Destroy_LU sDestroy_LU 44 #define ScalePermstruct_t sScalePermstruct_t 45 #define LUstruct_t sLUstruct_t 46 #define SOLVEstruct_t sSOLVEstruct_t 47 #define SolveFinalize sSolveFinalize 48 #define pGetDiagU psGetDiagU 49 #define pgssvx psgssvx 50 #define allocateA_dist sallocateA_dist 51 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist 52 #define SLU SLU_S 53 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 54 #define DeAllocLlu_3d sDeAllocLlu_3d 55 #define DeAllocGlu_3d sDeAllocGlu_3d 56 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d 57 #define pgssvx3d psgssvx3d 58 #endif 59 #else 60 #define CASTDOUBLECOMPLEX 61 #define CASTDOUBLECOMPLEXSTAR 62 #include <superlu_ddefs.h> 63 #define LUstructInit dLUstructInit 64 #define ScalePermstructInit dScalePermstructInit 65 #define ScalePermstructFree dScalePermstructFree 66 #define LUstructFree dLUstructFree 67 #define Destroy_LU dDestroy_LU 68 #define ScalePermstruct_t dScalePermstruct_t 69 #define LUstruct_t dLUstruct_t 70 #define SOLVEstruct_t dSOLVEstruct_t 71 #define SolveFinalize dSolveFinalize 72 #define pGetDiagU pdGetDiagU 73 #define pgssvx pdgssvx 74 #define allocateA_dist dallocateA_dist 75 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 76 #define SLU SLU_D 77 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 78 #define DeAllocLlu_3d dDeAllocLlu_3d 79 #define DeAllocGlu_3d dDeAllocGlu_3d 80 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 81 #define pgssvx3d pdgssvx3d 82 #endif 83 #endif 84 EXTERN_C_END 85 PETSC_PRAGMA_DIAGNOSTIC_IGNORED_END() 86 87 typedef struct { 88 int_t nprow, npcol, *row, *col; 89 gridinfo_t grid; 90 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 91 PetscBool use3d; 92 int_t npdep; /* replication factor, must be power of two */ 93 gridinfo3d_t grid3d; 94 #endif 95 superlu_dist_options_t options; 96 SuperMatrix A_sup; 97 ScalePermstruct_t ScalePermstruct; 98 LUstruct_t LUstruct; 99 int StatPrint; 100 SOLVEstruct_t SOLVEstruct; 101 fact_t FactPattern; 102 MPI_Comm comm_superlu; 103 PetscScalar *val; 104 PetscBool matsolve_iscalled, matmatsolve_iscalled; 105 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 106 } Mat_SuperLU_DIST; 107 108 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU) 109 { 110 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 111 112 PetscFunctionBegin; 113 PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU) 118 { 119 PetscFunctionBegin; 120 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 121 PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU)); 122 PetscFunctionReturn(PETSC_SUCCESS); 123 } 124 125 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 126 typedef struct { 127 MPI_Comm comm; 128 PetscBool busy; 129 gridinfo_t grid; 130 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 131 PetscBool use3d; 132 gridinfo3d_t grid3d; 133 #endif 134 } PetscSuperLU_DIST; 135 136 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 137 138 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state) 139 { 140 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val; 141 142 PetscFunctionBegin; 143 if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval"); 144 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 145 if (context->use3d) { 146 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d)); 147 } else 148 #endif 149 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid)); 150 PetscCallMPI(MPI_Comm_free(&context->comm)); 151 PetscCall(PetscFree(context)); 152 PetscFunctionReturn(MPI_SUCCESS); 153 } 154 155 /* 156 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 157 users who do not destroy all PETSc objects before PetscFinalize(). 158 159 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn() 160 can still check that the keyval associated with the MPI communicator is correct when the MPI 161 communicator is destroyed. 162 163 This is called in PetscFinalize() 164 */ 165 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 166 { 167 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 168 169 PetscFunctionBegin; 170 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp)); 171 PetscFunctionReturn(PETSC_SUCCESS); 172 } 173 174 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 175 { 176 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 177 178 PetscFunctionBegin; 179 if (lu->CleanUpSuperLU_Dist) { 180 /* Deallocate SuperLU_DIST storage */ 181 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 182 if (lu->options.SolveInitialized) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 183 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 184 if (lu->use3d) { 185 if (lu->grid3d.zscp.Iam == 0) { 186 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 187 } else { 188 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 189 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 190 } 191 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 192 } else 193 #endif 194 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 195 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct)); 196 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct)); 197 198 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 199 if (lu->comm_superlu) { 200 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 201 if (lu->use3d) { 202 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d)); 203 } else 204 #endif 205 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid)); 206 } 207 } 208 /* 209 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist. 210 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use 211 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist 212 * is off, and the communicator was not released or marked as "not busy " in the old code. 213 * Here we try to release comm regardless. 214 */ 215 if (lu->comm_superlu) { 216 PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 217 } else { 218 PetscSuperLU_DIST *context; 219 MPI_Comm comm; 220 PetscMPIInt flg; 221 222 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 223 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg)); 224 if (flg) context->busy = PETSC_FALSE; 225 } 226 227 PetscCall(PetscFree(A->data)); 228 /* clear composed functions */ 229 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 230 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL)); 231 232 PetscFunctionReturn(PETSC_SUCCESS); 233 } 234 235 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x) 236 { 237 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 238 PetscInt m = A->rmap->n; 239 SuperLUStat_t stat; 240 PetscReal berr[1]; 241 PetscScalar *bptr = NULL; 242 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 243 static PetscBool cite = PETSC_FALSE; 244 245 PetscFunctionBegin; 246 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 247 PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM " 248 "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n", 249 &cite)); 250 251 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 252 /* see comments in MatMatSolve() */ 253 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 254 lu->options.SolveInitialized = NO; 255 } 256 PetscCall(VecCopy(b_mpi, x)); 257 PetscCall(VecGetArray(x, &bptr)); 258 259 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 260 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 261 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 262 else 263 #endif 264 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 265 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 266 267 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 268 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 269 270 PetscCall(VecRestoreArray(x, &bptr)); 271 lu->matsolve_iscalled = PETSC_TRUE; 272 lu->matmatsolve_iscalled = PETSC_FALSE; 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X) 277 { 278 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 279 PetscInt m = A->rmap->n, nrhs; 280 SuperLUStat_t stat; 281 PetscReal berr[1]; 282 PetscScalar *bptr; 283 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 284 PetscBool flg; 285 286 PetscFunctionBegin; 287 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 288 PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 289 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 290 if (X != B_mpi) { 291 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 292 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 293 } 294 295 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 296 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 297 thus destroy it and create a new SOLVEstruct. 298 Otherwise it may result in memory corruption or incorrect solution 299 See src/mat/tests/ex125.c */ 300 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 301 lu->options.SolveInitialized = NO; 302 } 303 if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN)); 304 305 PetscCall(MatGetSize(B_mpi, NULL, &nrhs)); 306 307 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 308 PetscCall(MatDenseGetArray(X, &bptr)); 309 310 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 311 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 312 else 313 #endif 314 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 315 316 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 317 PetscCall(MatDenseRestoreArray(X, &bptr)); 318 319 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 320 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 321 lu->matsolve_iscalled = PETSC_FALSE; 322 lu->matmatsolve_iscalled = PETSC_TRUE; 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 /* 327 input: 328 F: numeric Cholesky factor 329 output: 330 nneg: total number of negative pivots 331 nzero: total number of zero pivots 332 npos: (global dimension of F) - nneg - nzero 333 */ 334 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 335 { 336 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 337 PetscScalar *diagU = NULL; 338 PetscInt M, i, neg = 0, zero = 0, pos = 0; 339 PetscReal r; 340 341 PetscFunctionBegin; 342 PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled"); 343 PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM"); 344 PetscCall(MatGetSize(F, &M, NULL)); 345 PetscCall(PetscMalloc1(M, &diagU)); 346 PetscCall(MatSuperluDistGetDiagU(F, diagU)); 347 for (i = 0; i < M; i++) { 348 #if defined(PETSC_USE_COMPLEX) 349 r = PetscImaginaryPart(diagU[i]) / 10.0; 350 PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0)); 351 r = PetscRealPart(diagU[i]); 352 #else 353 r = diagU[i]; 354 #endif 355 if (r > 0) { 356 pos++; 357 } else if (r < 0) { 358 neg++; 359 } else zero++; 360 } 361 362 PetscCall(PetscFree(diagU)); 363 if (nneg) *nneg = neg; 364 if (nzero) *nzero = zero; 365 if (npos) *npos = pos; 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info) 370 { 371 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 372 Mat Aloc; 373 const PetscScalar *av; 374 const PetscInt *ai = NULL, *aj = NULL; 375 PetscInt nz, dummy; 376 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 377 SuperLUStat_t stat; 378 PetscReal *berr = 0; 379 PetscBool ismpiaij, isseqaij, flg; 380 381 PetscFunctionBegin; 382 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 383 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij)); 384 if (ismpiaij) { 385 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc)); 386 } else if (isseqaij) { 387 PetscCall(PetscObjectReference((PetscObject)A)); 388 Aloc = A; 389 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 390 391 PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 392 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed"); 393 PetscCall(MatSeqAIJGetArrayRead(Aloc, &av)); 394 nz = ai[Aloc->rmap->n]; 395 396 /* Allocations for A_sup */ 397 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 398 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 399 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 400 if (lu->FactPattern == SamePattern_SameRowPerm) { 401 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 402 } else if (lu->FactPattern == SamePattern) { 403 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 404 if (lu->use3d) { 405 if (lu->grid3d.zscp.Iam == 0) { 406 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 407 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 408 } else { 409 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 410 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 411 } 412 } else 413 #endif 414 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 415 lu->options.Fact = SamePattern; 416 } else if (lu->FactPattern == DOFACT) { 417 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 418 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 419 lu->options.Fact = DOFACT; 420 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 421 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 422 } 423 424 /* Copy AIJ matrix to superlu_dist matrix */ 425 PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1)); 426 PetscCall(PetscArraycpy(lu->col, aj, nz)); 427 PetscCall(PetscArraycpy(lu->val, av, nz)); 428 PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 429 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed"); 430 PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av)); 431 PetscCall(MatDestroy(&Aloc)); 432 433 /* Create and setup A_sup */ 434 if (lu->options.Fact == DOFACT) { 435 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE)); 436 } 437 438 /* Factor the matrix. */ 439 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 440 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 441 if (lu->use3d) { 442 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 443 } else 444 #endif 445 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 446 if (sinfo > 0) { 447 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo); 448 if (sinfo <= lu->A_sup.ncol) { 449 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 450 PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo)); 451 } else if (sinfo > lu->A_sup.ncol) { 452 /* 453 number of bytes allocated when memory allocation 454 failure occurred, plus A->ncol. 455 */ 456 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 457 PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo)); 458 } 459 } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 460 461 if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ } 462 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 463 F->assembled = PETSC_TRUE; 464 F->preallocated = PETSC_TRUE; 465 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 466 PetscFunctionReturn(PETSC_SUCCESS); 467 } 468 469 /* Note the Petsc r and c permutations are ignored */ 470 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 471 { 472 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 473 PetscInt M = A->rmap->N, N = A->cmap->N, indx; 474 PetscMPIInt size, mpiflg; 475 PetscBool flg, set; 476 const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"}; 477 const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"}; 478 const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"}; 479 MPI_Comm comm; 480 PetscSuperLU_DIST *context = NULL; 481 482 PetscFunctionBegin; 483 /* Set options to F */ 484 PetscCall(PetscObjectGetComm((PetscObject)F, &comm)); 485 PetscCallMPI(MPI_Comm_size(comm, &size)); 486 487 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat"); 488 PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 489 if (set && !flg) lu->options.Equil = NO; 490 491 PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg)); 492 if (flg) { 493 switch (indx) { 494 case 0: 495 lu->options.RowPerm = NOROWPERM; 496 break; 497 case 1: 498 lu->options.RowPerm = LargeDiag_MC64; 499 break; 500 case 2: 501 lu->options.RowPerm = LargeDiag_AWPM; 502 break; 503 case 3: 504 lu->options.RowPerm = MY_PERMR; 505 break; 506 default: 507 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation"); 508 } 509 } 510 511 PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg)); 512 if (flg) { 513 switch (indx) { 514 case 0: 515 lu->options.ColPerm = NATURAL; 516 break; 517 case 1: 518 lu->options.ColPerm = MMD_AT_PLUS_A; 519 break; 520 case 2: 521 lu->options.ColPerm = MMD_ATA; 522 break; 523 case 3: 524 lu->options.ColPerm = METIS_AT_PLUS_A; 525 break; 526 case 4: 527 lu->options.ColPerm = PARMETIS; /* only works for np>1 */ 528 break; 529 default: 530 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 531 } 532 } 533 534 lu->options.ReplaceTinyPivot = NO; 535 PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 536 if (set && flg) lu->options.ReplaceTinyPivot = YES; 537 538 lu->options.ParSymbFact = NO; 539 PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set)); 540 if (set && flg && size > 1) { 541 #if defined(PETSC_HAVE_PARMETIS) 542 lu->options.ParSymbFact = YES; 543 lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 544 #else 545 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS"); 546 #endif 547 } 548 549 lu->FactPattern = SamePattern; 550 PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg)); 551 if (flg) { 552 switch (indx) { 553 case 0: 554 lu->FactPattern = SamePattern; 555 break; 556 case 1: 557 lu->FactPattern = SamePattern_SameRowPerm; 558 break; 559 case 2: 560 lu->FactPattern = DOFACT; 561 break; 562 } 563 } 564 565 lu->options.IterRefine = NOREFINE; 566 PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set)); 567 if (set) { 568 if (flg) lu->options.IterRefine = SLU_DOUBLE; 569 else lu->options.IterRefine = NOREFINE; 570 } 571 572 if (PetscLogPrintInfo) lu->options.PrintStat = YES; 573 else lu->options.PrintStat = NO; 574 PetscCall(PetscOptionsDeprecated("-mat_superlu_dist_statprint", "-mat_superlu_dist_printstat", "3.19", NULL)); 575 PetscCall(PetscOptionsBool("-mat_superlu_dist_printstat", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL)); 576 577 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg)); 578 if (!mpiflg || context->busy) { /* additional options */ 579 if (!mpiflg) { 580 PetscCall(PetscNew(&context)); 581 context->busy = PETSC_TRUE; 582 PetscCallMPI(MPI_Comm_dup(comm, &context->comm)); 583 PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context)); 584 } else { 585 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 586 } 587 588 /* Default number of process columns and rows */ 589 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size)); 590 if (!lu->nprow) lu->nprow = 1; 591 while (lu->nprow > 0) { 592 lu->npcol = (int_t)(size / lu->nprow); 593 if (size == lu->nprow * lu->npcol) break; 594 lu->nprow--; 595 } 596 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 597 lu->use3d = PETSC_FALSE; 598 lu->npdep = 1; 599 #endif 600 601 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 602 PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL)); 603 PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP_SYS, "-mat_superlu_dist_3d requires a system with a getline() implementation"); 604 if (lu->use3d) { 605 PetscInt t; 606 PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL)); 607 t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep); 608 PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep); 609 if (lu->npdep > 1) { 610 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep))); 611 if (!lu->nprow) lu->nprow = 1; 612 while (lu->nprow > 0) { 613 lu->npcol = (int_t)(size / (lu->npdep * lu->nprow)); 614 if (size == lu->nprow * lu->npcol * lu->npdep) break; 615 lu->nprow--; 616 } 617 } 618 } 619 #endif 620 PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL)); 621 PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL)); 622 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 623 PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep); 624 #else 625 PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol); 626 #endif 627 /* end of adding additional options */ 628 629 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 630 if (lu->use3d) { 631 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d)); 632 if (context) { 633 context->grid3d = lu->grid3d; 634 context->use3d = lu->use3d; 635 } 636 } else { 637 #endif 638 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 639 if (context) context->grid = lu->grid; 640 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 641 } 642 #endif 643 PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n")); 644 if (mpiflg) { 645 PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n")); 646 } else { 647 PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n")); 648 } 649 } else { /* (mpiflg && !context->busy) */ 650 PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.")); 651 context->busy = PETSC_TRUE; 652 lu->grid = context->grid; 653 } 654 PetscOptionsEnd(); 655 656 /* Initialize ScalePermstruct and LUstruct. */ 657 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct)); 658 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct)); 659 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 660 F->ops->solve = MatSolve_SuperLU_DIST; 661 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 662 F->ops->getinertia = NULL; 663 664 if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 665 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 666 PetscFunctionReturn(PETSC_SUCCESS); 667 } 668 669 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info) 670 { 671 PetscFunctionBegin; 672 PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info)); 673 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 674 PetscFunctionReturn(PETSC_SUCCESS); 675 } 676 677 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type) 678 { 679 PetscFunctionBegin; 680 *type = MATSOLVERSUPERLU_DIST; 681 PetscFunctionReturn(PETSC_SUCCESS); 682 } 683 684 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer) 685 { 686 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 687 superlu_dist_options_t options; 688 689 PetscFunctionBegin; 690 /* check if matrix is superlu_dist type */ 691 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS); 692 693 options = lu->options; 694 PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n")); 695 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 696 * format spec for int64_t is set to %d for whatever reason */ 697 PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 698 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 699 if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep)); 700 #endif 701 702 PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO])); 703 PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO])); 704 PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE])); 705 PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 706 707 switch (options.RowPerm) { 708 case NOROWPERM: 709 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n")); 710 break; 711 case LargeDiag_MC64: 712 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n")); 713 break; 714 case LargeDiag_AWPM: 715 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n")); 716 break; 717 case MY_PERMR: 718 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n")); 719 break; 720 default: 721 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 722 } 723 724 switch (options.ColPerm) { 725 case NATURAL: 726 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n")); 727 break; 728 case MMD_AT_PLUS_A: 729 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n")); 730 break; 731 case MMD_ATA: 732 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n")); 733 break; 734 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 735 case METIS_AT_PLUS_A: 736 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n")); 737 break; 738 case PARMETIS: 739 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n")); 740 break; 741 default: 742 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 743 } 744 745 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO])); 746 747 if (lu->FactPattern == SamePattern) { 748 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n")); 749 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 750 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n")); 751 } else if (lu->FactPattern == DOFACT) { 752 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n")); 753 } else { 754 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern"); 755 } 756 PetscFunctionReturn(PETSC_SUCCESS); 757 } 758 759 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer) 760 { 761 PetscBool iascii; 762 PetscViewerFormat format; 763 764 PetscFunctionBegin; 765 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 766 if (iascii) { 767 PetscCall(PetscViewerGetFormat(viewer, &format)); 768 if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer)); 769 } 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F) 774 { 775 Mat B; 776 Mat_SuperLU_DIST *lu; 777 PetscInt M = A->rmap->N, N = A->cmap->N; 778 PetscMPIInt size; 779 superlu_dist_options_t options; 780 781 PetscFunctionBegin; 782 /* Create the factorization matrix */ 783 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 784 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N)); 785 PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name)); 786 PetscCall(MatSetUp(B)); 787 B->ops->getinfo = MatGetInfo_External; 788 B->ops->view = MatView_SuperLU_DIST; 789 B->ops->destroy = MatDestroy_SuperLU_DIST; 790 791 /* Set the default input options: 792 options.Fact = DOFACT; 793 options.Equil = YES; 794 options.ParSymbFact = NO; 795 options.ColPerm = METIS_AT_PLUS_A; 796 options.RowPerm = LargeDiag_MC64; 797 options.ReplaceTinyPivot = YES; 798 options.IterRefine = DOUBLE; 799 options.Trans = NOTRANS; 800 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 801 options.RefineInitialized = NO; 802 options.PrintStat = YES; 803 options.SymPattern = NO; 804 */ 805 set_default_options_dist(&options); 806 807 B->trivialsymbolic = PETSC_TRUE; 808 if (ftype == MAT_FACTOR_LU) { 809 B->factortype = MAT_FACTOR_LU; 810 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 811 } else { 812 B->factortype = MAT_FACTOR_CHOLESKY; 813 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 814 options.SymPattern = YES; 815 } 816 817 /* set solvertype */ 818 PetscCall(PetscFree(B->solvertype)); 819 PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype)); 820 821 PetscCall(PetscNew(&lu)); 822 B->data = lu; 823 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 824 825 lu->options = options; 826 lu->options.Fact = DOFACT; 827 lu->matsolve_iscalled = PETSC_FALSE; 828 lu->matmatsolve_iscalled = PETSC_FALSE; 829 830 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist)); 831 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST)); 832 833 *F = B; 834 PetscFunctionReturn(PETSC_SUCCESS); 835 } 836 837 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 838 { 839 PetscFunctionBegin; 840 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 841 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 842 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 843 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 844 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 845 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_Delete_Fn, &Petsc_Superlu_dist_keyval, NULL)); 846 PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free)); 847 } 848 PetscFunctionReturn(PETSC_SUCCESS); 849 } 850 851 /*MC 852 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 853 854 Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST 855 856 Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver 857 858 Works with `MATAIJ` matrices 859 860 Options Database Keys: 861 + -mat_superlu_dist_r <n> - number of rows in processor partition 862 . -mat_superlu_dist_c <n> - number of columns in processor partition 863 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 864 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided 865 . -mat_superlu_dist_equil - equilibrate the matrix 866 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 867 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 868 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 869 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT` 870 . -mat_superlu_dist_iterrefine - use iterative refinement 871 - -mat_superlu_dist_printstat - print factorization information 872 873 Level: beginner 874 875 Note: 876 If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs. 877 878 .seealso: [](chapter_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()` 879 M*/ 880