1 /* 2 Provides an interface to the SuperLU_DIST sparse solver 3 */ 4 5 #include <../src/mat/impls/aij/seq/aij.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> 7 #include <petscpkg_version.h> 8 9 EXTERN_C_BEGIN 10 #if defined(PETSC_USE_COMPLEX) 11 #define CASTDOUBLECOMPLEX (doublecomplex *) 12 #define CASTDOUBLECOMPLEXSTAR (doublecomplex **) 13 #include <superlu_zdefs.h> 14 #define LUstructInit zLUstructInit 15 #define ScalePermstructInit zScalePermstructInit 16 #define ScalePermstructFree zScalePermstructFree 17 #define LUstructFree zLUstructFree 18 #define Destroy_LU zDestroy_LU 19 #define ScalePermstruct_t zScalePermstruct_t 20 #define LUstruct_t zLUstruct_t 21 #define SOLVEstruct_t zSOLVEstruct_t 22 #define SolveFinalize zSolveFinalize 23 #define pGetDiagU pzGetDiagU 24 #define pgssvx pzgssvx 25 #define allocateA_dist zallocateA_dist 26 #define Create_CompRowLoc_Matrix_dist zCreate_CompRowLoc_Matrix_dist 27 #define SLU SLU_Z 28 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 29 #define DeAllocLlu_3d zDeAllocLlu_3d 30 #define DeAllocGlu_3d zDeAllocGlu_3d 31 #define Destroy_A3d_gathered_on_2d zDestroy_A3d_gathered_on_2d 32 #define pgssvx3d pzgssvx3d 33 #endif 34 #elif defined(PETSC_USE_REAL_SINGLE) 35 #define CASTDOUBLECOMPLEX 36 #define CASTDOUBLECOMPLEXSTAR 37 #include <superlu_sdefs.h> 38 #define LUstructInit sLUstructInit 39 #define ScalePermstructInit sScalePermstructInit 40 #define ScalePermstructFree sScalePermstructFree 41 #define LUstructFree sLUstructFree 42 #define Destroy_LU sDestroy_LU 43 #define ScalePermstruct_t sScalePermstruct_t 44 #define LUstruct_t sLUstruct_t 45 #define SOLVEstruct_t sSOLVEstruct_t 46 #define SolveFinalize sSolveFinalize 47 #define pGetDiagU psGetDiagU 48 #define pgssvx psgssvx 49 #define allocateA_dist sallocateA_dist 50 #define Create_CompRowLoc_Matrix_dist sCreate_CompRowLoc_Matrix_dist 51 #define SLU SLU_S 52 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 53 #define DeAllocLlu_3d sDeAllocLlu_3d 54 #define DeAllocGlu_3d sDeAllocGlu_3d 55 #define Destroy_A3d_gathered_on_2d sDestroy_A3d_gathered_on_2d 56 #define pgssvx3d psgssvx3d 57 #endif 58 #else 59 #define CASTDOUBLECOMPLEX 60 #define CASTDOUBLECOMPLEXSTAR 61 #include <superlu_ddefs.h> 62 #define LUstructInit dLUstructInit 63 #define ScalePermstructInit dScalePermstructInit 64 #define ScalePermstructFree dScalePermstructFree 65 #define LUstructFree dLUstructFree 66 #define Destroy_LU dDestroy_LU 67 #define ScalePermstruct_t dScalePermstruct_t 68 #define LUstruct_t dLUstruct_t 69 #define SOLVEstruct_t dSOLVEstruct_t 70 #define SolveFinalize dSolveFinalize 71 #define pGetDiagU pdGetDiagU 72 #define pgssvx pdgssvx 73 #define allocateA_dist dallocateA_dist 74 #define Create_CompRowLoc_Matrix_dist dCreate_CompRowLoc_Matrix_dist 75 #define SLU SLU_D 76 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 77 #define DeAllocLlu_3d dDeAllocLlu_3d 78 #define DeAllocGlu_3d dDeAllocGlu_3d 79 #define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d 80 #define pgssvx3d pdgssvx3d 81 #endif 82 #endif 83 EXTERN_C_END 84 85 typedef struct { 86 int_t nprow, npcol, *row, *col; 87 gridinfo_t grid; 88 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 89 PetscBool use3d; 90 int_t npdep; /* replication factor, must be power of two */ 91 gridinfo3d_t grid3d; 92 #endif 93 superlu_dist_options_t options; 94 SuperMatrix A_sup; 95 ScalePermstruct_t ScalePermstruct; 96 LUstruct_t LUstruct; 97 int StatPrint; 98 SOLVEstruct_t SOLVEstruct; 99 fact_t FactPattern; 100 MPI_Comm comm_superlu; 101 PetscScalar *val; 102 PetscBool matsolve_iscalled, matmatsolve_iscalled; 103 PetscBool CleanUpSuperLU_Dist; /* Flag to clean up (non-global) SuperLU objects during Destroy */ 104 } Mat_SuperLU_DIST; 105 106 PetscErrorCode MatSuperluDistGetDiagU_SuperLU_DIST(Mat F, PetscScalar *diagU) 107 { 108 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 109 110 PetscFunctionBegin; 111 PetscStackCallExternalVoid("SuperLU_DIST:pGetDiagU", pGetDiagU(F->rmap->N, &lu->LUstruct, &lu->grid, CASTDOUBLECOMPLEX diagU)); 112 PetscFunctionReturn(PETSC_SUCCESS); 113 } 114 115 PetscErrorCode MatSuperluDistGetDiagU(Mat F, PetscScalar *diagU) 116 { 117 PetscFunctionBegin; 118 PetscValidHeaderSpecific(F, MAT_CLASSID, 1); 119 PetscTryMethod(F, "MatSuperluDistGetDiagU_C", (Mat, PetscScalar *), (F, diagU)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 /* This allows reusing the Superlu_DIST communicator and grid when only a single SuperLU_DIST matrix is used at a time */ 124 typedef struct { 125 MPI_Comm comm; 126 PetscBool busy; 127 gridinfo_t grid; 128 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 129 PetscBool use3d; 130 gridinfo3d_t grid3d; 131 #endif 132 } PetscSuperLU_DIST; 133 static PetscMPIInt Petsc_Superlu_dist_keyval = MPI_KEYVAL_INVALID; 134 135 PETSC_EXTERN PetscMPIInt MPIAPI Petsc_Superlu_dist_keyval_Delete_Fn(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state) 136 { 137 PetscSuperLU_DIST *context = (PetscSuperLU_DIST *)attr_val; 138 139 PetscFunctionBegin; 140 if (keyval != Petsc_Superlu_dist_keyval) SETERRMPI(PETSC_COMM_SELF, PETSC_ERR_ARG_CORRUPT, "Unexpected keyval"); 141 PetscCall(PetscInfo(NULL, "Removing Petsc_Superlu_dist_keyval attribute from communicator that is being freed\n")); 142 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 143 if (context->use3d) { 144 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&context->grid3d)); 145 } else 146 #endif 147 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&context->grid)); 148 PetscCallMPI(MPI_Comm_free(&context->comm)); 149 PetscCall(PetscFree(context)); 150 PetscFunctionReturn(MPI_SUCCESS); 151 } 152 153 /* 154 Performs MPI_Comm_free_keyval() on Petsc_Superlu_dist_keyval but keeps the global variable for 155 users who do not destroy all PETSc objects before PetscFinalize(). 156 157 The value Petsc_Superlu_dist_keyval is retained so that Petsc_Superlu_dist_keyval_Delete_Fn() 158 can still check that the keyval associated with the MPI communicator is correct when the MPI 159 communicator is destroyed. 160 161 This is called in PetscFinalize() 162 */ 163 static PetscErrorCode Petsc_Superlu_dist_keyval_free(void) 164 { 165 PetscMPIInt Petsc_Superlu_dist_keyval_temp = Petsc_Superlu_dist_keyval; 166 167 PetscFunctionBegin; 168 PetscCall(PetscInfo(NULL, "Freeing Petsc_Superlu_dist_keyval\n")); 169 PetscCallMPI(MPI_Comm_free_keyval(&Petsc_Superlu_dist_keyval_temp)); 170 PetscFunctionReturn(PETSC_SUCCESS); 171 } 172 173 static PetscErrorCode MatDestroy_SuperLU_DIST(Mat A) 174 { 175 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 176 177 PetscFunctionBegin; 178 if (lu->CleanUpSuperLU_Dist) { 179 /* Deallocate SuperLU_DIST storage */ 180 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 181 if (lu->options.SolveInitialized) PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 182 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 183 if (lu->use3d) { 184 if (lu->grid3d.zscp.Iam == 0) { 185 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 186 } else { 187 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 188 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 189 } 190 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_A3d_gathered_on_2d", Destroy_A3d_gathered_on_2d(&lu->SOLVEstruct, &lu->grid3d)); 191 } else 192 #endif 193 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid, &lu->LUstruct)); 194 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructFree", ScalePermstructFree(&lu->ScalePermstruct)); 195 PetscStackCallExternalVoid("SuperLU_DIST:LUstructFree", LUstructFree(&lu->LUstruct)); 196 197 /* Release the SuperLU_DIST process grid only if the matrix has its own copy, that is it is not in the communicator context */ 198 if (lu->comm_superlu) { 199 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 200 if (lu->use3d) { 201 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit3d", superlu_gridexit3d(&lu->grid3d)); 202 } else 203 #endif 204 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridexit", superlu_gridexit(&lu->grid)); 205 } 206 } 207 /* 208 * We always need to release the communicator that was created in MatGetFactor_aij_superlu_dist. 209 * lu->CleanUpSuperLU_Dist was turned on in MatLUFactorSymbolic_SuperLU_DIST. There are some use 210 * cases where we only create a matrix but do not solve mat. In these cases, lu->CleanUpSuperLU_Dist 211 * is off, and the communicator was not released or marked as "not busy " in the old code. 212 * Here we try to release comm regardless. 213 */ 214 if (lu->comm_superlu) { 215 PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 216 } else { 217 PetscSuperLU_DIST *context; 218 MPI_Comm comm; 219 PetscMPIInt flg; 220 221 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 222 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &flg)); 223 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Communicator does not have expected Petsc_Superlu_dist_keyval attribute"); 224 context->busy = PETSC_FALSE; 225 } 226 227 PetscCall(PetscFree(A->data)); 228 /* clear composed functions */ 229 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 230 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatSuperluDistGetDiagU_C", NULL)); 231 232 PetscFunctionReturn(PETSC_SUCCESS); 233 } 234 235 static PetscErrorCode MatSolve_SuperLU_DIST(Mat A, Vec b_mpi, Vec x) 236 { 237 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 238 PetscInt m = A->rmap->n; 239 SuperLUStat_t stat; 240 PetscReal berr[1]; 241 PetscScalar *bptr = NULL; 242 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 243 static PetscBool cite = PETSC_FALSE; 244 245 PetscFunctionBegin; 246 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 247 PetscCall(PetscCitationsRegister("@article{lidemmel03,\n author = {Xiaoye S. Li and James W. Demmel},\n title = {{SuperLU_DIST}: A Scalable Distributed-Memory Sparse Direct\n Solver for Unsymmetric Linear Systems},\n journal = {ACM " 248 "Trans. Mathematical Software},\n volume = {29},\n number = {2},\n pages = {110-140},\n year = 2003\n}\n", 249 &cite)); 250 251 if (lu->options.SolveInitialized && !lu->matsolve_iscalled) { 252 /* see comments in MatMatSolve() */ 253 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 254 lu->options.SolveInitialized = NO; 255 } 256 PetscCall(VecCopy(b_mpi, x)); 257 PetscCall(VecGetArray(x, &bptr)); 258 259 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 260 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 261 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 262 else 263 #endif 264 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, 1, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 265 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 266 267 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 268 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 269 270 PetscCall(VecRestoreArray(x, &bptr)); 271 lu->matsolve_iscalled = PETSC_TRUE; 272 lu->matmatsolve_iscalled = PETSC_FALSE; 273 PetscFunctionReturn(PETSC_SUCCESS); 274 } 275 276 static PetscErrorCode MatMatSolve_SuperLU_DIST(Mat A, Mat B_mpi, Mat X) 277 { 278 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 279 PetscInt m = A->rmap->n, nrhs; 280 SuperLUStat_t stat; 281 PetscReal berr[1]; 282 PetscScalar *bptr; 283 int info; /* SuperLU_Dist info code is ALWAYS an int, even with long long indices */ 284 PetscBool flg; 285 286 PetscFunctionBegin; 287 PetscCheck(lu->options.Fact == FACTORED, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "SuperLU_DIST options.Fact must equal FACTORED"); 288 PetscCall(PetscObjectTypeCompareAny((PetscObject)B_mpi, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 289 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix B must be MATDENSE matrix"); 290 if (X != B_mpi) { 291 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &flg, MATSEQDENSE, MATMPIDENSE, NULL)); 292 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 293 } 294 295 if (lu->options.SolveInitialized && !lu->matmatsolve_iscalled) { 296 /* communication pattern of SOLVEstruct is unlikely created for matmatsolve, 297 thus destroy it and create a new SOLVEstruct. 298 Otherwise it may result in memory corruption or incorrect solution 299 See src/mat/tests/ex125.c */ 300 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 301 lu->options.SolveInitialized = NO; 302 } 303 if (X != B_mpi) PetscCall(MatCopy(B_mpi, X, SAME_NONZERO_PATTERN)); 304 305 PetscCall(MatGetSize(B_mpi, NULL, &nrhs)); 306 307 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 308 PetscCall(MatDenseGetArray(X, &bptr)); 309 310 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 311 if (lu->use3d) PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 312 else 313 #endif 314 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, CASTDOUBLECOMPLEX bptr, m, nrhs, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &info)); 315 316 PetscCheck(!info, PETSC_COMM_SELF, PETSC_ERR_LIB, "pdgssvx fails, info: %d", info); 317 PetscCall(MatDenseRestoreArray(X, &bptr)); 318 319 if (lu->options.PrintStat) PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ 320 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 321 lu->matsolve_iscalled = PETSC_FALSE; 322 lu->matmatsolve_iscalled = PETSC_TRUE; 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 /* 327 input: 328 F: numeric Cholesky factor 329 output: 330 nneg: total number of negative pivots 331 nzero: total number of zero pivots 332 npos: (global dimension of F) - nneg - nzero 333 */ 334 static PetscErrorCode MatGetInertia_SuperLU_DIST(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 335 { 336 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 337 PetscScalar *diagU = NULL; 338 PetscInt M, i, neg = 0, zero = 0, pos = 0; 339 PetscReal r; 340 341 PetscFunctionBegin; 342 PetscCheck(F->assembled, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix factor F is not assembled"); 343 PetscCheck(lu->options.RowPerm == NOROWPERM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Must set NOROWPERM"); 344 PetscCall(MatGetSize(F, &M, NULL)); 345 PetscCall(PetscMalloc1(M, &diagU)); 346 PetscCall(MatSuperluDistGetDiagU(F, diagU)); 347 for (i = 0; i < M; i++) { 348 #if defined(PETSC_USE_COMPLEX) 349 r = PetscImaginaryPart(diagU[i]) / 10.0; 350 PetscCheck(r > -PETSC_MACHINE_EPSILON && r < PETSC_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "diagU[%" PetscInt_FMT "]=%g + i %g is non-real", i, (double)PetscRealPart(diagU[i]), (double)(r * 10.0)); 351 r = PetscRealPart(diagU[i]); 352 #else 353 r = diagU[i]; 354 #endif 355 if (r > 0) { 356 pos++; 357 } else if (r < 0) { 358 neg++; 359 } else zero++; 360 } 361 362 PetscCall(PetscFree(diagU)); 363 if (nneg) *nneg = neg; 364 if (nzero) *nzero = zero; 365 if (npos) *npos = pos; 366 PetscFunctionReturn(PETSC_SUCCESS); 367 } 368 369 static PetscErrorCode MatLUFactorNumeric_SuperLU_DIST(Mat F, Mat A, const MatFactorInfo *info) 370 { 371 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 372 Mat Aloc; 373 const PetscScalar *av; 374 const PetscInt *ai = NULL, *aj = NULL; 375 PetscInt nz, dummy; 376 int sinfo; /* SuperLU_Dist info flag is always an int even with long long indices */ 377 SuperLUStat_t stat; 378 PetscReal *berr = 0; 379 PetscBool ismpiaij, isseqaij, flg; 380 381 PetscFunctionBegin; 382 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij)); 383 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &ismpiaij)); 384 if (ismpiaij) { 385 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &Aloc)); 386 } else if (isseqaij) { 387 PetscCall(PetscObjectReference((PetscObject)A)); 388 Aloc = A; 389 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 390 391 PetscCall(MatGetRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 392 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "GetRowIJ failed"); 393 PetscCall(MatSeqAIJGetArrayRead(Aloc, &av)); 394 nz = ai[Aloc->rmap->n]; 395 396 /* Allocations for A_sup */ 397 if (lu->options.Fact == DOFACT) { /* first numeric factorization */ 398 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 399 } else { /* successive numeric factorization, sparsity pattern and perm_c are reused. */ 400 if (lu->FactPattern == SamePattern_SameRowPerm) { 401 lu->options.Fact = SamePattern_SameRowPerm; /* matrix has similar numerical values */ 402 } else if (lu->FactPattern == SamePattern) { 403 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 404 if (lu->use3d) { 405 if (lu->grid3d.zscp.Iam == 0) { 406 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->cmap->N, &lu->grid3d.grid2d, &lu->LUstruct)); 407 PetscStackCallExternalVoid("SuperLU_DIST:SolveFinalize", SolveFinalize(&lu->options, &lu->SOLVEstruct)); 408 } else { 409 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocLlu_3d", DeAllocLlu_3d(lu->A_sup.ncol, &lu->LUstruct, &lu->grid3d)); 410 PetscStackCallExternalVoid("SuperLU_DIST:DeAllocGlu_3d", DeAllocGlu_3d(&lu->LUstruct)); 411 } 412 } else 413 #endif 414 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 415 lu->options.Fact = SamePattern; 416 } else if (lu->FactPattern == DOFACT) { 417 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_CompRowLoc_Matrix_dist", Destroy_CompRowLoc_Matrix_dist(&lu->A_sup)); 418 PetscStackCallExternalVoid("SuperLU_DIST:Destroy_LU", Destroy_LU(A->rmap->N, &lu->grid, &lu->LUstruct)); 419 lu->options.Fact = DOFACT; 420 PetscStackCallExternalVoid("SuperLU_DIST:allocateA_dist", allocateA_dist(Aloc->rmap->n, nz, CASTDOUBLECOMPLEXSTAR & lu->val, &lu->col, &lu->row)); 421 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "options.Fact must be one of SamePattern SamePattern_SameRowPerm DOFACT"); 422 } 423 424 /* Copy AIJ matrix to superlu_dist matrix */ 425 PetscCall(PetscArraycpy(lu->row, ai, Aloc->rmap->n + 1)); 426 PetscCall(PetscArraycpy(lu->col, aj, nz)); 427 PetscCall(PetscArraycpy(lu->val, av, nz)); 428 PetscCall(MatRestoreRowIJ(Aloc, 0, PETSC_FALSE, PETSC_FALSE, &dummy, &ai, &aj, &flg)); 429 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_SUP, "RestoreRowIJ failed"); 430 PetscCall(MatSeqAIJRestoreArrayRead(Aloc, &av)); 431 PetscCall(MatDestroy(&Aloc)); 432 433 /* Create and setup A_sup */ 434 if (lu->options.Fact == DOFACT) { 435 PetscStackCallExternalVoid("SuperLU_DIST:Create_CompRowLoc_Matrix_dist", Create_CompRowLoc_Matrix_dist(&lu->A_sup, A->rmap->N, A->cmap->N, nz, A->rmap->n, A->rmap->rstart, CASTDOUBLECOMPLEX lu->val, lu->col, lu->row, SLU_NR_loc, SLU, SLU_GE)); 436 } 437 438 /* Factor the matrix. */ 439 PetscStackCallExternalVoid("SuperLU_DIST:PStatInit", PStatInit(&stat)); /* Initialize the statistics variables. */ 440 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) && !PetscDefined(MISSING_GETLINE) 441 if (lu->use3d) { 442 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx3d", pgssvx3d(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid3d, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 443 } else 444 #endif 445 PetscStackCallExternalVoid("SuperLU_DIST:pgssvx", pgssvx(&lu->options, &lu->A_sup, &lu->ScalePermstruct, 0, A->rmap->n, 0, &lu->grid, &lu->LUstruct, &lu->SOLVEstruct, berr, &stat, &sinfo)); 446 if (sinfo > 0) { 447 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_MAT_LU_ZRPVT, "Zero pivot in row %d", sinfo); 448 if (sinfo <= lu->A_sup.ncol) { 449 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 450 PetscCall(PetscInfo(F, "U(i,i) is exactly zero, i= %d\n", sinfo)); 451 } else if (sinfo > lu->A_sup.ncol) { 452 /* 453 number of bytes allocated when memory allocation 454 failure occurred, plus A->ncol. 455 */ 456 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 457 PetscCall(PetscInfo(F, "Number of bytes allocated when memory allocation fails %d\n", sinfo)); 458 } 459 } else PetscCheck(sinfo >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "info = %d, argument in p*gssvx() had an illegal value", sinfo); 460 461 if (lu->options.PrintStat) { PetscStackCallExternalVoid("SuperLU_DIST:PStatPrint", PStatPrint(&lu->options, &stat, &lu->grid)); /* Print the statistics. */ } 462 PetscStackCallExternalVoid("SuperLU_DIST:PStatFree", PStatFree(&stat)); 463 F->assembled = PETSC_TRUE; 464 F->preallocated = PETSC_TRUE; 465 lu->options.Fact = FACTORED; /* The factored form of A is supplied. Local option used by this func. only */ 466 PetscFunctionReturn(PETSC_SUCCESS); 467 } 468 469 /* Note the Petsc r and c permutations are ignored */ 470 static PetscErrorCode MatLUFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, IS c, const MatFactorInfo *info) 471 { 472 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)F->data; 473 PetscInt M = A->rmap->N, N = A->cmap->N, indx; 474 PetscMPIInt size, mpiflg; 475 PetscBool flg, set; 476 const char *colperm[] = {"NATURAL", "MMD_AT_PLUS_A", "MMD_ATA", "METIS_AT_PLUS_A", "PARMETIS"}; 477 const char *rowperm[] = {"NOROWPERM", "LargeDiag_MC64", "LargeDiag_AWPM", "MY_PERMR"}; 478 const char *factPattern[] = {"SamePattern", "SamePattern_SameRowPerm", "DOFACT"}; 479 MPI_Comm comm; 480 PetscSuperLU_DIST *context = NULL; 481 482 PetscFunctionBegin; 483 /* Set options to F */ 484 PetscCall(PetscObjectGetComm((PetscObject)F, &comm)); 485 PetscCallMPI(MPI_Comm_size(comm, &size)); 486 487 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "SuperLU_Dist Options", "Mat"); 488 PetscCall(PetscOptionsBool("-mat_superlu_dist_equil", "Equilibrate matrix", "None", lu->options.Equil ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 489 if (set && !flg) lu->options.Equil = NO; 490 491 PetscCall(PetscOptionsEList("-mat_superlu_dist_rowperm", "Row permutation", "None", rowperm, 4, rowperm[1], &indx, &flg)); 492 if (flg) { 493 switch (indx) { 494 case 0: 495 lu->options.RowPerm = NOROWPERM; 496 break; 497 case 1: 498 lu->options.RowPerm = LargeDiag_MC64; 499 break; 500 case 2: 501 lu->options.RowPerm = LargeDiag_AWPM; 502 break; 503 case 3: 504 lu->options.RowPerm = MY_PERMR; 505 break; 506 default: 507 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown row permutation"); 508 } 509 } 510 511 PetscCall(PetscOptionsEList("-mat_superlu_dist_colperm", "Column permutation", "None", colperm, 5, colperm[3], &indx, &flg)); 512 if (flg) { 513 switch (indx) { 514 case 0: 515 lu->options.ColPerm = NATURAL; 516 break; 517 case 1: 518 lu->options.ColPerm = MMD_AT_PLUS_A; 519 break; 520 case 2: 521 lu->options.ColPerm = MMD_ATA; 522 break; 523 case 3: 524 lu->options.ColPerm = METIS_AT_PLUS_A; 525 break; 526 case 4: 527 lu->options.ColPerm = PARMETIS; /* only works for np>1 */ 528 break; 529 default: 530 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 531 } 532 } 533 534 lu->options.ReplaceTinyPivot = NO; 535 PetscCall(PetscOptionsBool("-mat_superlu_dist_replacetinypivot", "Replace tiny pivots", "None", lu->options.ReplaceTinyPivot ? PETSC_TRUE : PETSC_FALSE, &flg, &set)); 536 if (set && flg) lu->options.ReplaceTinyPivot = YES; 537 538 lu->options.ParSymbFact = NO; 539 PetscCall(PetscOptionsBool("-mat_superlu_dist_parsymbfact", "Parallel symbolic factorization", "None", PETSC_FALSE, &flg, &set)); 540 if (set && flg && size > 1) { 541 #if defined(PETSC_HAVE_PARMETIS) 542 lu->options.ParSymbFact = YES; 543 lu->options.ColPerm = PARMETIS; /* in v2.2, PARMETIS is forced for ParSymbFact regardless of user ordering setting */ 544 #else 545 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "parsymbfact needs PARMETIS"); 546 #endif 547 } 548 549 lu->FactPattern = SamePattern; 550 PetscCall(PetscOptionsEList("-mat_superlu_dist_fact", "Sparsity pattern for repeated matrix factorization", "None", factPattern, 3, factPattern[0], &indx, &flg)); 551 if (flg) { 552 switch (indx) { 553 case 0: 554 lu->FactPattern = SamePattern; 555 break; 556 case 1: 557 lu->FactPattern = SamePattern_SameRowPerm; 558 break; 559 case 2: 560 lu->FactPattern = DOFACT; 561 break; 562 } 563 } 564 565 lu->options.IterRefine = NOREFINE; 566 PetscCall(PetscOptionsBool("-mat_superlu_dist_iterrefine", "Use iterative refinement", "None", lu->options.IterRefine == NOREFINE ? PETSC_FALSE : PETSC_TRUE, &flg, &set)); 567 if (set) { 568 if (flg) lu->options.IterRefine = SLU_DOUBLE; 569 else lu->options.IterRefine = NOREFINE; 570 } 571 572 if (PetscLogPrintInfo) lu->options.PrintStat = YES; 573 else lu->options.PrintStat = NO; 574 PetscCall(PetscOptionsBool("-mat_superlu_dist_statprint", "Print factorization information", "None", (PetscBool)lu->options.PrintStat, (PetscBool *)&lu->options.PrintStat, NULL)); 575 576 /* Additional options for special cases */ 577 if (Petsc_Superlu_dist_keyval == MPI_KEYVAL_INVALID) { 578 PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_Superlu_dist_keyval_Delete_Fn, &Petsc_Superlu_dist_keyval, (void *)0)); 579 PetscCall(PetscRegisterFinalize(Petsc_Superlu_dist_keyval_free)); 580 } 581 PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Superlu_dist_keyval, &context, &mpiflg)); 582 if (!mpiflg || context->busy) { /* additional options */ 583 if (!mpiflg) { 584 PetscCall(PetscNew(&context)); 585 context->busy = PETSC_TRUE; 586 PetscCallMPI(MPI_Comm_dup(comm, &context->comm)); 587 PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Superlu_dist_keyval, context)); 588 } else { 589 PetscCall(PetscCommGetComm(PetscObjectComm((PetscObject)A), &lu->comm_superlu)); 590 } 591 592 /* Default number of process columns and rows */ 593 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)size)); 594 if (!lu->nprow) lu->nprow = 1; 595 while (lu->nprow > 0) { 596 lu->npcol = (int_t)(size / lu->nprow); 597 if (size == lu->nprow * lu->npcol) break; 598 lu->nprow--; 599 } 600 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 601 lu->use3d = PETSC_FALSE; 602 lu->npdep = 1; 603 #endif 604 605 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 606 PetscCall(PetscOptionsBool("-mat_superlu_dist_3d", "Use SuperLU_DIST 3D distribution", "None", lu->use3d, &lu->use3d, NULL)); 607 PetscCheck(!PetscDefined(MISSING_GETLINE) || !lu->use3d, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP_SYS, "-mat_superlu_dist_3d requires a system with a getline() implementation"); 608 if (lu->use3d) { 609 PetscInt t; 610 PetscCall(PetscOptionsInt("-mat_superlu_dist_d", "Number of z entries in processor partition", "None", lu->npdep, (PetscInt *)&lu->npdep, NULL)); 611 t = (PetscInt)PetscLog2Real((PetscReal)lu->npdep); 612 PetscCheck(PetscPowInt(2, t) == lu->npdep, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_OUTOFRANGE, "-mat_superlu_dist_d %lld must be a power of 2", (long long)lu->npdep); 613 if (lu->npdep > 1) { 614 lu->nprow = (int_t)(0.5 + PetscSqrtReal((PetscReal)(size / lu->npdep))); 615 if (!lu->nprow) lu->nprow = 1; 616 while (lu->nprow > 0) { 617 lu->npcol = (int_t)(size / (lu->npdep * lu->nprow)); 618 if (size == lu->nprow * lu->npcol * lu->npdep) break; 619 lu->nprow--; 620 } 621 } 622 } 623 #endif 624 PetscCall(PetscOptionsInt("-mat_superlu_dist_r", "Number rows in processor partition", "None", lu->nprow, (PetscInt *)&lu->nprow, NULL)); 625 PetscCall(PetscOptionsInt("-mat_superlu_dist_c", "Number columns in processor partition", "None", lu->npcol, (PetscInt *)&lu->npcol, NULL)); 626 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 627 PetscCheck(size == lu->nprow * lu->npcol * lu->npdep, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld * npdep %lld", size, (long long)lu->nprow, (long long)lu->npcol, (long long)lu->npdep); 628 #else 629 PetscCheck(size == lu->nprow * lu->npcol, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Number of processes %d must equal to nprow %lld * npcol %lld", size, (long long)lu->nprow, (long long)lu->npcol); 630 #endif 631 /* end of adding additional options */ 632 633 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 634 if (lu->use3d) { 635 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit3d", superlu_gridinit3d(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, lu->npdep, &lu->grid3d)); 636 if (context) { 637 context->grid3d = lu->grid3d; 638 context->use3d = lu->use3d; 639 } 640 } else { 641 #endif 642 PetscStackCallExternalVoid("SuperLU_DIST:superlu_gridinit", superlu_gridinit(context ? context->comm : lu->comm_superlu, lu->nprow, lu->npcol, &lu->grid)); 643 if (context) context->grid = lu->grid; 644 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 645 } 646 #endif 647 PetscCall(PetscInfo(NULL, "Duplicating a communicator for SuperLU_DIST and calling superlu_gridinit()\n")); 648 if (mpiflg) { 649 PetscCall(PetscInfo(NULL, "Communicator attribute already in use so not saving communicator and SuperLU_DIST grid in communicator attribute \n")); 650 } else { 651 PetscCall(PetscInfo(NULL, "Storing communicator and SuperLU_DIST grid in communicator attribute\n")); 652 } 653 } else { /* (mpiflg && !context->busy) */ 654 PetscCall(PetscInfo(NULL, "Reusing communicator and superlu_gridinit() for SuperLU_DIST from communicator attribute.")); 655 context->busy = PETSC_TRUE; 656 lu->grid = context->grid; 657 } 658 PetscOptionsEnd(); 659 660 /* Initialize ScalePermstruct and LUstruct. */ 661 PetscStackCallExternalVoid("SuperLU_DIST:ScalePermstructInit", ScalePermstructInit(M, N, &lu->ScalePermstruct)); 662 PetscStackCallExternalVoid("SuperLU_DIST:LUstructInit", LUstructInit(N, &lu->LUstruct)); 663 F->ops->lufactornumeric = MatLUFactorNumeric_SuperLU_DIST; 664 F->ops->solve = MatSolve_SuperLU_DIST; 665 F->ops->matsolve = MatMatSolve_SuperLU_DIST; 666 F->ops->getinertia = NULL; 667 668 if (A->symmetric == PETSC_BOOL3_TRUE || A->hermitian == PETSC_BOOL3_TRUE) F->ops->getinertia = MatGetInertia_SuperLU_DIST; 669 lu->CleanUpSuperLU_Dist = PETSC_TRUE; 670 PetscFunctionReturn(PETSC_SUCCESS); 671 } 672 673 static PetscErrorCode MatCholeskyFactorSymbolic_SuperLU_DIST(Mat F, Mat A, IS r, const MatFactorInfo *info) 674 { 675 PetscFunctionBegin; 676 PetscCall(MatLUFactorSymbolic_SuperLU_DIST(F, A, r, r, info)); 677 F->ops->choleskyfactornumeric = MatLUFactorNumeric_SuperLU_DIST; 678 PetscFunctionReturn(PETSC_SUCCESS); 679 } 680 681 static PetscErrorCode MatFactorGetSolverType_aij_superlu_dist(Mat A, MatSolverType *type) 682 { 683 PetscFunctionBegin; 684 *type = MATSOLVERSUPERLU_DIST; 685 PetscFunctionReturn(PETSC_SUCCESS); 686 } 687 688 static PetscErrorCode MatView_Info_SuperLU_DIST(Mat A, PetscViewer viewer) 689 { 690 Mat_SuperLU_DIST *lu = (Mat_SuperLU_DIST *)A->data; 691 superlu_dist_options_t options; 692 693 PetscFunctionBegin; 694 /* check if matrix is superlu_dist type */ 695 if (A->ops->solve != MatSolve_SuperLU_DIST) PetscFunctionReturn(PETSC_SUCCESS); 696 697 options = lu->options; 698 PetscCall(PetscViewerASCIIPrintf(viewer, "SuperLU_DIST run parameters:\n")); 699 /* would love to use superlu 'IFMT' macro but it looks like it's inconsistently applied, the 700 * format spec for int64_t is set to %d for whatever reason */ 701 PetscCall(PetscViewerASCIIPrintf(viewer, " Process grid nprow %lld x npcol %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 702 #if PETSC_PKG_SUPERLU_DIST_VERSION_GE(7, 2, 0) 703 if (lu->use3d) PetscCall(PetscViewerASCIIPrintf(viewer, " Using 3d decomposition with npdep %lld \n", (long long)lu->npdep)); 704 #endif 705 706 PetscCall(PetscViewerASCIIPrintf(viewer, " Equilibrate matrix %s \n", PetscBools[options.Equil != NO])); 707 PetscCall(PetscViewerASCIIPrintf(viewer, " Replace tiny pivots %s \n", PetscBools[options.ReplaceTinyPivot != NO])); 708 PetscCall(PetscViewerASCIIPrintf(viewer, " Use iterative refinement %s \n", PetscBools[options.IterRefine == SLU_DOUBLE])); 709 PetscCall(PetscViewerASCIIPrintf(viewer, " Processors in row %lld col partition %lld \n", (long long)lu->nprow, (long long)lu->npcol)); 710 711 switch (options.RowPerm) { 712 case NOROWPERM: 713 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation NOROWPERM\n")); 714 break; 715 case LargeDiag_MC64: 716 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_MC64\n")); 717 break; 718 case LargeDiag_AWPM: 719 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation LargeDiag_AWPM\n")); 720 break; 721 case MY_PERMR: 722 PetscCall(PetscViewerASCIIPrintf(viewer, " Row permutation MY_PERMR\n")); 723 break; 724 default: 725 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 726 } 727 728 switch (options.ColPerm) { 729 case NATURAL: 730 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation NATURAL\n")); 731 break; 732 case MMD_AT_PLUS_A: 733 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_AT_PLUS_A\n")); 734 break; 735 case MMD_ATA: 736 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation MMD_ATA\n")); 737 break; 738 /* Even though this is called METIS, the SuperLU_DIST code sets this by default if PARMETIS is defined, not METIS */ 739 case METIS_AT_PLUS_A: 740 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation METIS_AT_PLUS_A\n")); 741 break; 742 case PARMETIS: 743 PetscCall(PetscViewerASCIIPrintf(viewer, " Column permutation PARMETIS\n")); 744 break; 745 default: 746 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown column permutation"); 747 } 748 749 PetscCall(PetscViewerASCIIPrintf(viewer, " Parallel symbolic factorization %s \n", PetscBools[options.ParSymbFact != NO])); 750 751 if (lu->FactPattern == SamePattern) { 752 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern\n")); 753 } else if (lu->FactPattern == SamePattern_SameRowPerm) { 754 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization SamePattern_SameRowPerm\n")); 755 } else if (lu->FactPattern == DOFACT) { 756 PetscCall(PetscViewerASCIIPrintf(viewer, " Repeated factorization DOFACT\n")); 757 } else { 758 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Unknown factorization pattern"); 759 } 760 PetscFunctionReturn(PETSC_SUCCESS); 761 } 762 763 static PetscErrorCode MatView_SuperLU_DIST(Mat A, PetscViewer viewer) 764 { 765 PetscBool iascii; 766 PetscViewerFormat format; 767 768 PetscFunctionBegin; 769 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 770 if (iascii) { 771 PetscCall(PetscViewerGetFormat(viewer, &format)); 772 if (format == PETSC_VIEWER_ASCII_INFO) PetscCall(MatView_Info_SuperLU_DIST(A, viewer)); 773 } 774 PetscFunctionReturn(PETSC_SUCCESS); 775 } 776 777 static PetscErrorCode MatGetFactor_aij_superlu_dist(Mat A, MatFactorType ftype, Mat *F) 778 { 779 Mat B; 780 Mat_SuperLU_DIST *lu; 781 PetscInt M = A->rmap->N, N = A->cmap->N; 782 PetscMPIInt size; 783 superlu_dist_options_t options; 784 785 PetscFunctionBegin; 786 /* Create the factorization matrix */ 787 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 788 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, M, N)); 789 PetscCall(PetscStrallocpy("superlu_dist", &((PetscObject)B)->type_name)); 790 PetscCall(MatSetUp(B)); 791 B->ops->getinfo = MatGetInfo_External; 792 B->ops->view = MatView_SuperLU_DIST; 793 B->ops->destroy = MatDestroy_SuperLU_DIST; 794 795 /* Set the default input options: 796 options.Fact = DOFACT; 797 options.Equil = YES; 798 options.ParSymbFact = NO; 799 options.ColPerm = METIS_AT_PLUS_A; 800 options.RowPerm = LargeDiag_MC64; 801 options.ReplaceTinyPivot = YES; 802 options.IterRefine = DOUBLE; 803 options.Trans = NOTRANS; 804 options.SolveInitialized = NO; -hold the communication pattern used MatSolve() and MatMatSolve() 805 options.RefineInitialized = NO; 806 options.PrintStat = YES; 807 options.SymPattern = NO; 808 */ 809 set_default_options_dist(&options); 810 811 B->trivialsymbolic = PETSC_TRUE; 812 if (ftype == MAT_FACTOR_LU) { 813 B->factortype = MAT_FACTOR_LU; 814 B->ops->lufactorsymbolic = MatLUFactorSymbolic_SuperLU_DIST; 815 } else { 816 B->factortype = MAT_FACTOR_CHOLESKY; 817 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_SuperLU_DIST; 818 options.SymPattern = YES; 819 } 820 821 /* set solvertype */ 822 PetscCall(PetscFree(B->solvertype)); 823 PetscCall(PetscStrallocpy(MATSOLVERSUPERLU_DIST, &B->solvertype)); 824 825 PetscCall(PetscNew(&lu)); 826 B->data = lu; 827 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 828 829 lu->options = options; 830 lu->options.Fact = DOFACT; 831 lu->matsolve_iscalled = PETSC_FALSE; 832 lu->matmatsolve_iscalled = PETSC_FALSE; 833 834 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_aij_superlu_dist)); 835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSuperluDistGetDiagU_C", MatSuperluDistGetDiagU_SuperLU_DIST)); 836 837 *F = B; 838 PetscFunctionReturn(PETSC_SUCCESS); 839 } 840 841 PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_SuperLU_DIST(void) 842 { 843 PetscFunctionBegin; 844 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 845 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_superlu_dist)); 846 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 847 PetscCall(MatSolverTypeRegister(MATSOLVERSUPERLU_DIST, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_superlu_dist)); 848 PetscFunctionReturn(PETSC_SUCCESS); 849 } 850 851 /*MC 852 MATSOLVERSUPERLU_DIST - Parallel direct solver package for LU factorization 853 854 Use `./configure --download-superlu_dist --download-parmetis --download-metis --download-ptscotch` to have PETSc installed with SuperLU_DIST 855 856 Use `-pc_type lu` `-pc_factor_mat_solver_type superlu_dist` to use this direct solver 857 858 Works with `MATAIJ` matrices 859 860 Options Database Keys: 861 + -mat_superlu_dist_r <n> - number of rows in processor partition 862 . -mat_superlu_dist_c <n> - number of columns in processor partition 863 . -mat_superlu_dist_3d - use 3d partition, requires SuperLU_DIST 7.2 or later 864 . -mat_superlu_dist_d <n> - depth in 3d partition (valid only if `-mat_superlu_dist_3d`) is provided 865 . -mat_superlu_dist_equil - equilibrate the matrix 866 . -mat_superlu_dist_rowperm <NOROWPERM,LargeDiag_MC64,LargeDiag_AWPM,MY_PERMR> - row permutation 867 . -mat_superlu_dist_colperm <NATURAL,MMD_AT_PLUS_A,MMD_ATA,METIS_AT_PLUS_A,PARMETIS> - column permutation 868 . -mat_superlu_dist_replacetinypivot - replace tiny pivots 869 . -mat_superlu_dist_fact <SamePattern> - (choose one of) `SamePattern`, `SamePattern_SameRowPerm`, `DOFACT` 870 . -mat_superlu_dist_iterrefine - use iterative refinement 871 - -mat_superlu_dist_statprint - print factorization information 872 873 Level: beginner 874 875 Note: 876 If PETSc was configured with `--with-cuda` then this solver will automatically use the GPUs. 877 878 .seealso: [](chapter_matrices), `Mat`, `PCLU`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatGetFactor()` 879 M*/ 880