1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 #include <petsc/private/vecimpl.h> 10 11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 15 #include <cmumps_c.h> 16 #include <zmumps_c.h> 17 #include <smumps_c.h> 18 #include <dmumps_c.h> 19 #else 20 #if defined(PETSC_USE_COMPLEX) 21 #if defined(PETSC_USE_REAL_SINGLE) 22 #include <cmumps_c.h> 23 #define MUMPS_c cmumps_c 24 #define MUMPS_STRUC_C CMUMPS_STRUC_C 25 #define MumpsScalar CMUMPS_COMPLEX 26 #else 27 #include <zmumps_c.h> 28 #define MUMPS_c zmumps_c 29 #define MUMPS_STRUC_C ZMUMPS_STRUC_C 30 #define MumpsScalar ZMUMPS_COMPLEX 31 #endif 32 #else 33 #if defined(PETSC_USE_REAL_SINGLE) 34 #include <smumps_c.h> 35 #define MUMPS_c smumps_c 36 #define MUMPS_STRUC_C SMUMPS_STRUC_C 37 #define MumpsScalar SMUMPS_REAL 38 #else 39 #include <dmumps_c.h> 40 #define MUMPS_c dmumps_c 41 #define MUMPS_STRUC_C DMUMPS_STRUC_C 42 #define MumpsScalar DMUMPS_REAL 43 #endif 44 #endif 45 #endif 46 EXTERN_C_END 47 48 #define JOB_INIT -1 49 #define JOB_NULL 0 50 #define JOB_FACTSYMBOLIC 1 51 #define JOB_FACTNUMERIC 2 52 #define JOB_SOLVE 3 53 #define JOB_END -2 54 55 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 56 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 57 naming convention in PetscMPIInt, PetscBLASInt etc. 58 */ 59 typedef MUMPS_INT PetscMUMPSInt; 60 61 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 62 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 63 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 64 #endif 65 #else 66 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 67 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 68 #endif 69 #endif 70 71 #define MPIU_MUMPSINT MPI_INT 72 #define PETSC_MUMPS_INT_MAX 2147483647 73 #define PETSC_MUMPS_INT_MIN -2147483648 74 75 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 76 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b) 77 { 78 PetscFunctionBegin; 79 #if PetscDefined(USE_64BIT_INDICES) 80 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 81 #endif 82 *b = (PetscMUMPSInt)a; 83 PetscFunctionReturn(PETSC_SUCCESS); 84 } 85 86 /* Put these utility routines here since they are only used in this file */ 87 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 88 { 89 PetscInt myval; 90 PetscBool myset; 91 92 PetscFunctionBegin; 93 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 94 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 95 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 96 if (set) *set = myset; 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 100 101 // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C. 102 // 103 // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type. 104 // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size 105 // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall(). 106 // 107 // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface. 108 // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used. 109 // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers. 110 typedef struct { 111 PetscPrecision precision; // precision used by MUMPS 112 void *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex 113 114 // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions. 115 MUMPS_INT sym, par, job; 116 MUMPS_INT comm_fortran; /* Fortran communicator */ 117 MUMPS_INT *icntl; 118 void *cntl; // MumpsReal, fixed size array 119 MUMPS_INT n; 120 MUMPS_INT nblk; 121 122 /* Assembled entry */ 123 MUMPS_INT8 nnz; 124 MUMPS_INT *irn; 125 MUMPS_INT *jcn; 126 void *a; // MumpsScalar, centralized input 127 PetscCount a_len; 128 129 /* Distributed entry */ 130 MUMPS_INT8 nnz_loc; 131 MUMPS_INT *irn_loc; 132 MUMPS_INT *jcn_loc; 133 void *a_loc; // MumpsScalar, distributed input 134 PetscCount a_loc_len; 135 136 /* Matrix by blocks */ 137 MUMPS_INT *blkptr; 138 MUMPS_INT *blkvar; 139 140 /* Ordering, if given by user */ 141 MUMPS_INT *perm_in; 142 143 /* RHS, solution, ouptput data and statistics */ 144 void *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc; // MumpsScalar buffers 145 PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar 146 147 MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc; 148 MUMPS_INT nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc; 149 // MUMPS_INT nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0) 150 MUMPS_INT schur_lld; 151 MUMPS_INT *info, *infog; // fixed size array 152 void *rinfo, *rinfog; // MumpsReal, fixed size array 153 154 /* Null space */ 155 MUMPS_INT *pivnul_list; // allocated by MUMPS! 156 MUMPS_INT *mapping; // allocated by MUMPS! 157 158 /* Schur */ 159 MUMPS_INT size_schur; 160 MUMPS_INT *listvar_schur; 161 void *schur; // MumpsScalar 162 PetscCount schur_len; 163 164 /* For out-of-core */ 165 char *ooc_tmpdir; // fixed size array 166 char *ooc_prefix; // fixed size array 167 } XMUMPS_STRUC_C; 168 169 // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct 170 #define AllocatInternalID(MUMPS_STRUC_T, outer) \ 171 do { \ 172 MUMPS_STRUC_T *inner; \ 173 PetscCall(PetscNew(&inner)); \ 174 outer->icntl = inner->icntl; \ 175 outer->cntl = inner->cntl; \ 176 outer->info = inner->info; \ 177 outer->infog = inner->infog; \ 178 outer->rinfo = inner->rinfo; \ 179 outer->rinfog = inner->rinfog; \ 180 outer->ooc_tmpdir = inner->ooc_tmpdir; \ 181 outer->ooc_prefix = inner->ooc_prefix; \ 182 /* the three field should never change after init */ \ 183 inner->comm_fortran = outer->comm_fortran; \ 184 inner->par = outer->par; \ 185 inner->sym = outer->sym; \ 186 outer->internal_id = inner; \ 187 } while (0) 188 189 // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner 190 static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision) 191 { 192 PetscFunctionBegin; 193 outer->precision = precision; 194 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 195 #if defined(PETSC_USE_COMPLEX) 196 if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer); 197 else AllocatInternalID(ZMUMPS_STRUC_C, outer); 198 #else 199 if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer); 200 else AllocatInternalID(DMUMPS_STRUC_C, outer); 201 #endif 202 #else 203 AllocatInternalID(MUMPS_STRUC_C, outer); 204 #endif 205 PetscFunctionReturn(PETSC_SUCCESS); 206 } 207 208 #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \ 209 do { \ 210 MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \ 211 PetscCall(PetscFree(inner->a)); \ 212 PetscCall(PetscFree(inner->a_loc)); \ 213 PetscCall(PetscFree(inner->redrhs)); \ 214 PetscCall(PetscFree(inner->rhs)); \ 215 PetscCall(PetscFree(inner->rhs_sparse)); \ 216 PetscCall(PetscFree(inner->rhs_loc)); \ 217 PetscCall(PetscFree(inner->sol_loc)); \ 218 PetscCall(PetscFree(inner->schur)); \ 219 } while (0) 220 221 static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer) 222 { 223 PetscFunctionBegin; 224 if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer 225 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 226 const PetscPrecision mumps_precision = outer->precision; 227 if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision 228 #if defined(PETSC_USE_COMPLEX) 229 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer); 230 else FreeInternalIDFields(ZMUMPS_STRUC_C, outer); 231 #else 232 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer); 233 else FreeInternalIDFields(DMUMPS_STRUC_C, outer); 234 #endif 235 } 236 #endif 237 PetscCall(PetscFree(outer->internal_id)); 238 } 239 PetscFunctionReturn(PETSC_SUCCESS); 240 } 241 242 // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>. 243 // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array. 244 // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>) 245 // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request. 246 // 247 // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same. 248 // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output. 249 // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output. 250 // 251 // 252 // Input parameters: 253 // + convert - whether to do PetscScalar to MumpsScalar conversion 254 // . n - length of the PetscScalar array 255 // . pa - [n]], points to the PetscScalar array 256 // . precision - precision of MumpsScalar 257 // . m - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero. 258 // - ma - on input, an existing MumpsScalar array if any. 259 // 260 // Output parameters: 261 // + m - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0 262 // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same. 263 // 264 // Note: 265 // New memory, if allocated, is done via PetscMalloc1(), and is owned by caller. 266 static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma) 267 { 268 PetscFunctionBegin; 269 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 270 const PetscPrecision mumps_precision = precision; 271 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision); 272 #if defined(PETSC_USE_COMPLEX) 273 if (mumps_precision != PETSC_SCALAR_PRECISION) { 274 if (mumps_precision == PETSC_PRECISION_SINGLE) { 275 if (*m < n) { 276 PetscCall(PetscFree(*ma)); 277 PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma)); 278 *m = n; 279 } 280 if (convert) { 281 CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma; 282 for (PetscCount i = 0; i < n; i++) { 283 b[i].r = PetscRealPart(pa[i]); 284 b[i].i = PetscImaginaryPart(pa[i]); 285 } 286 } 287 } else { 288 if (*m < n) { 289 PetscCall(PetscFree(*ma)); 290 PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma)); 291 *m = n; 292 } 293 if (convert) { 294 ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma; 295 for (PetscCount i = 0; i < n; i++) { 296 b[i].r = PetscRealPart(pa[i]); 297 b[i].i = PetscImaginaryPart(pa[i]); 298 } 299 } 300 } 301 } 302 #else 303 if (mumps_precision != PETSC_SCALAR_PRECISION) { 304 if (mumps_precision == PETSC_PRECISION_SINGLE) { 305 if (*m < n) { 306 PetscCall(PetscFree(*ma)); 307 PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma)); 308 *m = n; 309 } 310 if (convert) { 311 SMUMPS_REAL *b = *(SMUMPS_REAL **)ma; 312 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 313 } 314 } else { 315 if (*m < n) { 316 PetscCall(PetscFree(*ma)); 317 PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma)); 318 *m = n; 319 } 320 if (convert) { 321 DMUMPS_REAL *b = *(DMUMPS_REAL **)ma; 322 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 323 } 324 } 325 } 326 #endif 327 else 328 #endif 329 { 330 if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any 331 *ma = (void *)pa; // same precision, make them alias 332 *m = 0; 333 } 334 PetscFunctionReturn(PETSC_SUCCESS); 335 } 336 337 // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>. 338 // 339 // 1) If the two types are different, cast array elements. 340 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 341 static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa) 342 { 343 PetscFunctionBegin; 344 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 345 if (mumps_precision != PETSC_SCALAR_PRECISION) { 346 #if defined(PETSC_USE_COMPLEX) 347 if (mumps_precision == PETSC_PRECISION_SINGLE) { 348 PetscReal *a = (PetscReal *)pa; 349 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 350 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 351 } else { 352 PetscReal *a = (PetscReal *)pa; 353 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 354 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 355 } 356 #else 357 if (mumps_precision == PETSC_PRECISION_SINGLE) { 358 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 359 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 360 } else { 361 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 362 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 363 } 364 #endif 365 } else 366 #endif 367 PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n)); 368 PetscFunctionReturn(PETSC_SUCCESS); 369 } 370 371 // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>. 372 // 373 // 1) If the two types are different, cast array elements. 374 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 375 static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma) 376 { 377 PetscFunctionBegin; 378 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 379 if (mumps_precision != PETSC_SCALAR_PRECISION) { 380 #if defined(PETSC_USE_COMPLEX) 381 if (mumps_precision == PETSC_PRECISION_SINGLE) { 382 CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma; 383 for (PetscCount i = 0; i < n; i++) { 384 b[i].r = PetscRealPart(pa[i]); 385 b[i].i = PetscImaginaryPart(pa[i]); 386 } 387 } else { 388 ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma; 389 for (PetscCount i = 0; i < n; i++) { 390 b[i].r = PetscRealPart(pa[i]); 391 b[i].i = PetscImaginaryPart(pa[i]); 392 } 393 } 394 #else 395 if (mumps_precision == PETSC_PRECISION_SINGLE) { 396 SMUMPS_REAL *b = (SMUMPS_REAL *)ma; 397 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 398 } else { 399 DMUMPS_REAL *b = (DMUMPS_REAL *)ma; 400 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 401 } 402 #endif 403 } else 404 #endif 405 PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n)); 406 PetscFunctionReturn(PETSC_SUCCESS); 407 } 408 409 static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id) 410 { 411 return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT; 412 } 413 414 #define PreMumpsCall(inner, outer, mumpsscalar) \ 415 do { \ 416 inner->job = outer->job; \ 417 inner->n = outer->n; \ 418 inner->nblk = outer->nblk; \ 419 inner->nnz = outer->nnz; \ 420 inner->irn = outer->irn; \ 421 inner->jcn = outer->jcn; \ 422 inner->a = (mumpsscalar *)outer->a; \ 423 inner->nnz_loc = outer->nnz_loc; \ 424 inner->irn_loc = outer->irn_loc; \ 425 inner->jcn_loc = outer->jcn_loc; \ 426 inner->a_loc = (mumpsscalar *)outer->a_loc; \ 427 inner->blkptr = outer->blkptr; \ 428 inner->blkvar = outer->blkvar; \ 429 inner->perm_in = outer->perm_in; \ 430 inner->rhs = (mumpsscalar *)outer->rhs; \ 431 inner->redrhs = (mumpsscalar *)outer->redrhs; \ 432 inner->rhs_sparse = (mumpsscalar *)outer->rhs_sparse; \ 433 inner->sol_loc = (mumpsscalar *)outer->sol_loc; \ 434 inner->rhs_loc = (mumpsscalar *)outer->rhs_loc; \ 435 inner->irhs_sparse = outer->irhs_sparse; \ 436 inner->irhs_ptr = outer->irhs_ptr; \ 437 inner->isol_loc = outer->isol_loc; \ 438 inner->irhs_loc = outer->irhs_loc; \ 439 inner->nrhs = outer->nrhs; \ 440 inner->lrhs = outer->lrhs; \ 441 inner->lredrhs = outer->lredrhs; \ 442 inner->nz_rhs = outer->nz_rhs; \ 443 inner->lsol_loc = outer->lsol_loc; \ 444 inner->nloc_rhs = outer->nloc_rhs; \ 445 inner->lrhs_loc = outer->lrhs_loc; \ 446 inner->schur_lld = outer->schur_lld; \ 447 inner->size_schur = outer->size_schur; \ 448 inner->listvar_schur = outer->listvar_schur; \ 449 inner->schur = (mumpsscalar *)outer->schur; \ 450 } while (0) 451 452 #define PostMumpsCall(inner, outer) \ 453 do { \ 454 outer->pivnul_list = inner->pivnul_list; \ 455 outer->mapping = inner->mapping; \ 456 } while (0) 457 458 // Entry for PETSc to call mumps 459 static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer) 460 { 461 PetscFunctionBegin; 462 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 463 #if defined(PETSC_USE_COMPLEX) 464 if (outer->precision == PETSC_PRECISION_SINGLE) { 465 CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id; 466 PreMumpsCall(inner, outer, CMUMPS_COMPLEX); 467 PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner)); 468 PostMumpsCall(inner, outer); 469 } else { 470 ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id; 471 PreMumpsCall(inner, outer, ZMUMPS_COMPLEX); 472 PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner)); 473 PostMumpsCall(inner, outer); 474 } 475 #else 476 if (outer->precision == PETSC_PRECISION_SINGLE) { 477 SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id; 478 PreMumpsCall(inner, outer, SMUMPS_REAL); 479 PetscStackCallExternalVoid("smumps_c", smumps_c(inner)); 480 PostMumpsCall(inner, outer); 481 } else { 482 DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id; 483 PreMumpsCall(inner, outer, DMUMPS_REAL); 484 PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner)); 485 PostMumpsCall(inner, outer); 486 } 487 #endif 488 #else 489 MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id; 490 PreMumpsCall(inner, outer, MumpsScalar); 491 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner)); 492 PostMumpsCall(inner, outer); 493 #endif 494 PetscFunctionReturn(PETSC_SUCCESS); 495 } 496 497 /* macros s.t. indices match MUMPS documentation */ 498 #define ICNTL(I) icntl[(I) - 1] 499 #define INFOG(I) infog[(I) - 1] 500 #define INFO(I) info[(I) - 1] 501 502 // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I! 503 #if defined(PETSC_USE_COMPLEX) 504 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i) 505 #else 506 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I]) 507 #endif 508 509 // Get a value from MumpsReal arrays. The value is convertible to PetscReal. 510 #define ID_CNTL_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1]) 511 #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1]) 512 #define ID_RINFO_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1]) 513 514 // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL> 515 #define ID_CNTL_SET(ID, I, VAL) \ 516 do { \ 517 if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \ 518 else ((double *)(ID).cntl)[(I) - 1] = (VAL); \ 519 } while (0) 520 521 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 522 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 523 #define PetscMUMPS_c(mumps) \ 524 do { \ 525 if (mumps->use_petsc_omp_support) { \ 526 if (mumps->is_omp_master) { \ 527 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 528 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 529 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 530 PetscCall(PetscFPTrapPop()); \ 531 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 532 } \ 533 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 534 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 535 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 536 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 537 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 538 */ \ 539 SMUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \ 540 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 541 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 542 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 543 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 544 } else { \ 545 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 546 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 547 PetscCall(PetscFPTrapPop()); \ 548 } \ 549 } while (0) 550 #else 551 #define PetscMUMPS_c(mumps) \ 552 do { \ 553 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 554 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 555 PetscCall(PetscFPTrapPop()); \ 556 } while (0) 557 #endif 558 559 typedef struct Mat_MUMPS Mat_MUMPS; 560 struct Mat_MUMPS { 561 XMUMPS_STRUC_C id; 562 563 MatStructure matstruc; 564 PetscMPIInt myid, petsc_size; 565 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 566 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 567 PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 568 PetscMUMPSInt sym; 569 MPI_Comm mumps_comm; 570 PetscMUMPSInt *ICNTL_pre; 571 PetscReal *CNTL_pre; 572 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 573 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 574 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 575 PetscMUMPSInt ICNTL26; 576 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 577 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 578 PetscInt *rhs_nrow, max_nrhs; 579 PetscMPIInt *rhs_recvcounts, *rhs_disps; 580 PetscScalar *rhs_loc, *rhs_recvbuf; 581 #endif 582 Vec b_seq, x_seq; 583 PetscInt ninfo, *info; /* which INFO to display */ 584 PetscInt sizeredrhs; 585 PetscScalar *schur_sol; 586 PetscInt schur_sizesol; 587 PetscScalar *redrhs; // buffer in PetscScalar in case MumpsScalar is in a different precision 588 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 589 PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 590 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 591 592 /* Support for MATNEST */ 593 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 594 PetscCount *nest_vals_start; 595 PetscScalar *nest_vals; 596 597 /* stuff used by petsc/mumps OpenMP support*/ 598 PetscBool use_petsc_omp_support; 599 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 600 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */ 601 PetscCount *recvcount; /* a collection of nnz on omp_master */ 602 PetscMPIInt tag, omp_comm_size; 603 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 604 MPI_Request *reqs; 605 }; 606 607 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 608 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 609 */ 610 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 611 { 612 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */ 613 614 PetscFunctionBegin; 615 #if defined(PETSC_USE_64BIT_INDICES) 616 { 617 PetscInt i; 618 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 619 PetscCall(PetscFree(mumps->ia_alloc)); 620 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 621 mumps->cur_ilen = nrow + 1; 622 } 623 if (nnz > mumps->cur_jlen) { 624 PetscCall(PetscFree(mumps->ja_alloc)); 625 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 626 mumps->cur_jlen = nnz; 627 } 628 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 629 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 630 *ia_mumps = mumps->ia_alloc; 631 *ja_mumps = mumps->ja_alloc; 632 } 633 #else 634 *ia_mumps = ia; 635 *ja_mumps = ja; 636 #endif 637 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 638 PetscFunctionReturn(PETSC_SUCCESS); 639 } 640 641 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 642 { 643 PetscFunctionBegin; 644 PetscCall(PetscFree(mumps->id.listvar_schur)); 645 PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID() 646 PetscCall(PetscFree(mumps->schur_sol)); 647 mumps->id.size_schur = 0; 648 mumps->id.schur_lld = 0; 649 if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built 650 PetscFunctionReturn(PETSC_SUCCESS); 651 } 652 653 /* solve with rhs in mumps->id.redrhs and return in the same location */ 654 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 655 { 656 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 657 Mat S, B, X; // solve S*X = B; all three matrices are dense 658 MatFactorSchurStatus schurstatus; 659 PetscInt sizesol; 660 const PetscScalar *xarray; 661 662 PetscFunctionBegin; 663 PetscCall(MatFactorFactorizeSchurComplement(F)); 664 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 665 PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs)); 666 667 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B)); 668 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 669 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 670 PetscCall(MatBindToCPU(B, S->boundtocpu)); 671 #endif 672 switch (schurstatus) { 673 case MAT_FACTOR_SCHUR_FACTORED: 674 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X)); 675 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 676 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 677 PetscCall(MatBindToCPU(X, S->boundtocpu)); 678 #endif 679 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 680 PetscCall(MatMatSolveTranspose(S, B, X)); 681 } else { 682 PetscCall(MatMatSolve(S, B, X)); 683 } 684 break; 685 case MAT_FACTOR_SCHUR_INVERTED: 686 sizesol = mumps->id.nrhs * mumps->id.size_schur; 687 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 688 PetscCall(PetscFree(mumps->schur_sol)); 689 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 690 mumps->schur_sizesol = sizesol; 691 } 692 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 693 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 694 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 695 PetscCall(MatBindToCPU(X, S->boundtocpu)); 696 #endif 697 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 698 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 699 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 700 } else { 701 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 702 } 703 PetscCall(MatProductSetFromOptions(X)); 704 PetscCall(MatProductSymbolic(X)); 705 PetscCall(MatProductNumeric(X)); 706 707 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 708 break; 709 default: 710 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 711 } 712 // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device. 713 PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs. 714 PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs)); 715 PetscCall(MatDenseRestoreArrayRead(X, &xarray)); 716 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 717 PetscCall(MatDestroy(&B)); 718 PetscCall(MatDestroy(&X)); 719 PetscFunctionReturn(PETSC_SUCCESS); 720 } 721 722 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 723 { 724 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 725 726 PetscFunctionBegin; 727 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 728 PetscFunctionReturn(PETSC_SUCCESS); 729 } 730 if (!expansion) { /* prepare for the condensation step */ 731 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 732 /* allocate MUMPS internal array to store reduced right-hand sides */ 733 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 734 mumps->id.lredrhs = mumps->id.size_schur; 735 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 736 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 737 PetscCall(PetscFree(mumps->redrhs)); 738 PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs)); 739 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs)); 740 } 741 } else { /* prepare for the expansion step */ 742 PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */ 743 mumps->id.ICNTL(26) = 2; /* expansion phase */ 744 PetscMUMPS_c(mumps); 745 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 746 /* restore defaults */ 747 mumps->id.ICNTL(26) = -1; 748 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 749 if (mumps->id.nrhs > 1) { 750 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 751 PetscCall(PetscFree(mumps->redrhs)); 752 mumps->id.redrhs_len = 0; 753 mumps->id.lredrhs = 0; 754 mumps->sizeredrhs = 0; 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 /* 761 MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz] 762 763 input: 764 A - matrix in aij,baij or sbaij format 765 shift - 0: C style output triple; 1: Fortran style output triple. 766 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 767 MAT_REUSE_MATRIX: only the values in v array are updated 768 output: 769 nnz - dim of r, c, and v (number of local nonzero entries of A) 770 r, c, v - row and col index, matrix values (matrix triples) 771 772 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 773 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 774 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 775 776 */ 777 778 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 779 { 780 const PetscScalar *av; 781 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 782 PetscCount nz, rnz, k; 783 PetscMUMPSInt *row, *col; 784 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 785 786 PetscFunctionBegin; 787 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 788 if (reuse == MAT_INITIAL_MATRIX) { 789 nz = aa->nz; 790 ai = aa->i; 791 aj = aa->j; 792 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 793 for (PetscCount i = k = 0; i < M; i++) { 794 rnz = ai[i + 1] - ai[i]; 795 ajj = aj + ai[i]; 796 for (PetscCount j = 0; j < rnz; j++) { 797 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 798 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 799 k++; 800 } 801 } 802 mumps->val = (PetscScalar *)av; 803 mumps->irn = row; 804 mumps->jcn = col; 805 mumps->nnz = nz; 806 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 807 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 808 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 809 PetscFunctionReturn(PETSC_SUCCESS); 810 } 811 812 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 813 { 814 PetscCount nz, i, j, k, r; 815 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 816 PetscMUMPSInt *row, *col; 817 818 PetscFunctionBegin; 819 nz = a->sliidx[a->totalslices]; 820 if (reuse == MAT_INITIAL_MATRIX) { 821 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 822 for (i = k = 0; i < a->totalslices; i++) { 823 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 824 } 825 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 826 mumps->irn = row; 827 mumps->jcn = col; 828 mumps->nnz = nz; 829 mumps->val = a->val; 830 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 831 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 832 PetscFunctionReturn(PETSC_SUCCESS); 833 } 834 835 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 836 { 837 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 838 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 839 PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 840 PetscInt bs; 841 PetscMUMPSInt *row, *col; 842 843 PetscFunctionBegin; 844 if (reuse == MAT_INITIAL_MATRIX) { 845 PetscCall(MatGetBlockSize(A, &bs)); 846 M = A->rmap->N / bs; 847 ai = aa->i; 848 aj = aa->j; 849 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 850 for (i = 0; i < M; i++) { 851 ajj = aj + ai[i]; 852 rnz = ai[i + 1] - ai[i]; 853 for (k = 0; k < rnz; k++) { 854 for (j = 0; j < bs; j++) { 855 for (m = 0; m < bs; m++) { 856 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 857 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 858 idx++; 859 } 860 } 861 } 862 } 863 mumps->irn = row; 864 mumps->jcn = col; 865 mumps->nnz = nz; 866 mumps->val = aa->a; 867 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 868 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 869 PetscFunctionReturn(PETSC_SUCCESS); 870 } 871 872 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 873 { 874 const PetscInt *ai, *aj, *ajj; 875 PetscInt bs; 876 PetscCount nz, rnz, i, j, k, m; 877 PetscMUMPSInt *row, *col; 878 PetscScalar *val; 879 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 880 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 881 #if defined(PETSC_USE_COMPLEX) 882 PetscBool isset, hermitian; 883 #endif 884 885 PetscFunctionBegin; 886 #if defined(PETSC_USE_COMPLEX) 887 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 888 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 889 #endif 890 ai = aa->i; 891 aj = aa->j; 892 PetscCall(MatGetBlockSize(A, &bs)); 893 if (reuse == MAT_INITIAL_MATRIX) { 894 const PetscCount alloc_size = aa->nz * bs2; 895 896 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 897 if (bs > 1) { 898 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 899 mumps->val = mumps->val_alloc; 900 } else { 901 mumps->val = aa->a; 902 } 903 mumps->irn = row; 904 mumps->jcn = col; 905 } else { 906 row = mumps->irn; 907 col = mumps->jcn; 908 } 909 val = mumps->val; 910 911 nz = 0; 912 if (bs > 1) { 913 for (i = 0; i < mbs; i++) { 914 rnz = ai[i + 1] - ai[i]; 915 ajj = aj + ai[i]; 916 for (j = 0; j < rnz; j++) { 917 for (k = 0; k < bs; k++) { 918 for (m = 0; m < bs; m++) { 919 if (ajj[j] > i || k >= m) { 920 if (reuse == MAT_INITIAL_MATRIX) { 921 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 922 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 923 } 924 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 925 } 926 } 927 } 928 } 929 } 930 } else if (reuse == MAT_INITIAL_MATRIX) { 931 for (i = 0; i < mbs; i++) { 932 rnz = ai[i + 1] - ai[i]; 933 ajj = aj + ai[i]; 934 for (j = 0; j < rnz; j++) { 935 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 936 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 937 nz++; 938 } 939 } 940 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz); 941 } else if (mumps->nest_vals) 942 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 943 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 944 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 945 PetscFunctionReturn(PETSC_SUCCESS); 946 } 947 948 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 949 { 950 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 951 PetscCount nz, rnz, i, j; 952 const PetscScalar *av, *v1; 953 PetscScalar *val; 954 PetscMUMPSInt *row, *col; 955 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 956 PetscBool diagDense; 957 #if defined(PETSC_USE_COMPLEX) 958 PetscBool hermitian, isset; 959 #endif 960 961 PetscFunctionBegin; 962 #if defined(PETSC_USE_COMPLEX) 963 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 964 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 965 #endif 966 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 967 ai = aa->i; 968 aj = aa->j; 969 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense)); 970 if (reuse == MAT_INITIAL_MATRIX) { 971 /* count nz in the upper triangular part of A */ 972 nz = 0; 973 if (!diagDense) { 974 for (i = 0; i < M; i++) { 975 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 976 for (j = ai[i]; j < ai[i + 1]; j++) { 977 if (aj[j] < i) continue; 978 nz++; 979 } 980 } else { 981 nz += ai[i + 1] - adiag[i]; 982 } 983 } 984 } else { 985 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 986 } 987 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 988 PetscCall(PetscMalloc1(nz, &val)); 989 mumps->nnz = nz; 990 mumps->irn = row; 991 mumps->jcn = col; 992 mumps->val = mumps->val_alloc = val; 993 994 nz = 0; 995 if (!diagDense) { 996 for (i = 0; i < M; i++) { 997 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 998 for (j = ai[i]; j < ai[i + 1]; j++) { 999 if (aj[j] < i) continue; 1000 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1001 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 1002 val[nz] = av[j]; 1003 nz++; 1004 } 1005 } else { 1006 rnz = ai[i + 1] - adiag[i]; 1007 ajj = aj + adiag[i]; 1008 v1 = av + adiag[i]; 1009 for (j = 0; j < rnz; j++) { 1010 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1011 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1012 val[nz++] = v1[j]; 1013 } 1014 } 1015 } 1016 } else { 1017 for (i = 0; i < M; i++) { 1018 rnz = ai[i + 1] - adiag[i]; 1019 ajj = aj + adiag[i]; 1020 v1 = av + adiag[i]; 1021 for (j = 0; j < rnz; j++) { 1022 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1023 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1024 val[nz++] = v1[j]; 1025 } 1026 } 1027 } 1028 } else { 1029 nz = 0; 1030 val = mumps->val; 1031 if (!diagDense) { 1032 for (i = 0; i < M; i++) { 1033 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 1034 for (j = ai[i]; j < ai[i + 1]; j++) { 1035 if (aj[j] < i) continue; 1036 val[nz++] = av[j]; 1037 } 1038 } else { 1039 rnz = ai[i + 1] - adiag[i]; 1040 v1 = av + adiag[i]; 1041 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1042 } 1043 } 1044 } else { 1045 for (i = 0; i < M; i++) { 1046 rnz = ai[i + 1] - adiag[i]; 1047 v1 = av + adiag[i]; 1048 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1049 } 1050 } 1051 } 1052 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 1053 PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1057 { 1058 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 1059 PetscInt bs; 1060 PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB; 1061 PetscMUMPSInt *row, *col; 1062 const PetscScalar *av, *bv, *v1, *v2; 1063 PetscScalar *val; 1064 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 1065 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 1066 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1067 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 1068 #if defined(PETSC_USE_COMPLEX) 1069 PetscBool hermitian, isset; 1070 #endif 1071 1072 PetscFunctionBegin; 1073 #if defined(PETSC_USE_COMPLEX) 1074 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1075 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1076 #endif 1077 PetscCall(MatGetBlockSize(A, &bs)); 1078 rstart = A->rmap->rstart; 1079 ai = aa->i; 1080 aj = aa->j; 1081 bi = bb->i; 1082 bj = bb->j; 1083 av = aa->a; 1084 bv = bb->a; 1085 1086 garray = mat->garray; 1087 1088 if (reuse == MAT_INITIAL_MATRIX) { 1089 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 1090 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1091 PetscCall(PetscMalloc1(nz, &val)); 1092 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 1093 mumps->irn = row; 1094 mumps->jcn = col; 1095 mumps->val = mumps->val_alloc = val; 1096 } else { 1097 val = mumps->val; 1098 } 1099 1100 jj = 0; 1101 irow = rstart; 1102 for (i = 0; i < mbs; i++) { 1103 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1104 countA = ai[i + 1] - ai[i]; 1105 countB = bi[i + 1] - bi[i]; 1106 bjj = bj + bi[i]; 1107 v1 = av + ai[i] * bs2; 1108 v2 = bv + bi[i] * bs2; 1109 1110 if (bs > 1) { 1111 /* A-part */ 1112 for (j = 0; j < countA; j++) { 1113 for (k = 0; k < bs; k++) { 1114 for (m = 0; m < bs; m++) { 1115 if (rstart + ajj[j] * bs > irow || k >= m) { 1116 if (reuse == MAT_INITIAL_MATRIX) { 1117 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1118 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 1119 } 1120 val[jj++] = v1[j * bs2 + m + k * bs]; 1121 } 1122 } 1123 } 1124 } 1125 1126 /* B-part */ 1127 for (j = 0; j < countB; j++) { 1128 for (k = 0; k < bs; k++) { 1129 for (m = 0; m < bs; m++) { 1130 if (reuse == MAT_INITIAL_MATRIX) { 1131 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1132 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 1133 } 1134 val[jj++] = v2[j * bs2 + m + k * bs]; 1135 } 1136 } 1137 } 1138 } else { 1139 /* A-part */ 1140 for (j = 0; j < countA; j++) { 1141 if (reuse == MAT_INITIAL_MATRIX) { 1142 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1143 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1144 } 1145 val[jj++] = v1[j]; 1146 } 1147 1148 /* B-part */ 1149 for (j = 0; j < countB; j++) { 1150 if (reuse == MAT_INITIAL_MATRIX) { 1151 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1152 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1153 } 1154 val[jj++] = v2[j]; 1155 } 1156 } 1157 irow += bs; 1158 } 1159 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 1160 PetscFunctionReturn(PETSC_SUCCESS); 1161 } 1162 1163 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1164 { 1165 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1166 PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB; 1167 PetscMUMPSInt *row, *col; 1168 const PetscScalar *av, *bv, *v1, *v2; 1169 PetscScalar *val; 1170 Mat Ad, Ao; 1171 Mat_SeqAIJ *aa; 1172 Mat_SeqAIJ *bb; 1173 1174 PetscFunctionBegin; 1175 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1176 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1177 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1178 1179 aa = (Mat_SeqAIJ *)Ad->data; 1180 bb = (Mat_SeqAIJ *)Ao->data; 1181 ai = aa->i; 1182 aj = aa->j; 1183 bi = bb->i; 1184 bj = bb->j; 1185 1186 rstart = A->rmap->rstart; 1187 cstart = A->cmap->rstart; 1188 1189 if (reuse == MAT_INITIAL_MATRIX) { 1190 nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 1191 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1192 PetscCall(PetscMalloc1(nz, &val)); 1193 mumps->nnz = nz; 1194 mumps->irn = row; 1195 mumps->jcn = col; 1196 mumps->val = mumps->val_alloc = val; 1197 } else { 1198 val = mumps->val; 1199 } 1200 1201 jj = 0; 1202 irow = rstart; 1203 for (i = 0; i < m; i++) { 1204 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1205 countA = ai[i + 1] - ai[i]; 1206 countB = bi[i + 1] - bi[i]; 1207 bjj = bj + bi[i]; 1208 v1 = av + ai[i]; 1209 v2 = bv + bi[i]; 1210 1211 /* A-part */ 1212 for (j = 0; j < countA; j++) { 1213 if (reuse == MAT_INITIAL_MATRIX) { 1214 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1215 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 1216 } 1217 val[jj++] = v1[j]; 1218 } 1219 1220 /* B-part */ 1221 for (j = 0; j < countB; j++) { 1222 if (reuse == MAT_INITIAL_MATRIX) { 1223 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1224 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1225 } 1226 val[jj++] = v2[j]; 1227 } 1228 irow++; 1229 } 1230 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1231 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1232 PetscFunctionReturn(PETSC_SUCCESS); 1233 } 1234 1235 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1236 { 1237 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 1238 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 1239 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1240 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 1241 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 1242 const PetscInt bs2 = mat->bs2; 1243 PetscInt bs; 1244 PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx; 1245 PetscMUMPSInt *row, *col; 1246 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 1247 PetscScalar *val; 1248 1249 PetscFunctionBegin; 1250 PetscCall(MatGetBlockSize(A, &bs)); 1251 if (reuse == MAT_INITIAL_MATRIX) { 1252 nz = bs2 * (aa->nz + bb->nz); 1253 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1254 PetscCall(PetscMalloc1(nz, &val)); 1255 mumps->nnz = nz; 1256 mumps->irn = row; 1257 mumps->jcn = col; 1258 mumps->val = mumps->val_alloc = val; 1259 } else { 1260 val = mumps->val; 1261 } 1262 1263 jj = 0; 1264 irow = rstart; 1265 for (i = 0; i < mbs; i++) { 1266 countA = ai[i + 1] - ai[i]; 1267 countB = bi[i + 1] - bi[i]; 1268 ajj = aj + ai[i]; 1269 bjj = bj + bi[i]; 1270 v1 = av + bs2 * ai[i]; 1271 v2 = bv + bs2 * bi[i]; 1272 1273 idx = 0; 1274 /* A-part */ 1275 for (k = 0; k < countA; k++) { 1276 for (j = 0; j < bs; j++) { 1277 for (n = 0; n < bs; n++) { 1278 if (reuse == MAT_INITIAL_MATRIX) { 1279 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1280 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 1281 } 1282 val[jj++] = v1[idx++]; 1283 } 1284 } 1285 } 1286 1287 idx = 0; 1288 /* B-part */ 1289 for (k = 0; k < countB; k++) { 1290 for (j = 0; j < bs; j++) { 1291 for (n = 0; n < bs; n++) { 1292 if (reuse == MAT_INITIAL_MATRIX) { 1293 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1294 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 1295 } 1296 val[jj++] = v2[idx++]; 1297 } 1298 } 1299 } 1300 irow += bs; 1301 } 1302 PetscFunctionReturn(PETSC_SUCCESS); 1303 } 1304 1305 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1306 { 1307 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1308 PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 1309 PetscMUMPSInt *row, *col; 1310 const PetscScalar *av, *bv, *v1, *v2; 1311 PetscScalar *val; 1312 Mat Ad, Ao; 1313 Mat_SeqAIJ *aa; 1314 Mat_SeqAIJ *bb; 1315 #if defined(PETSC_USE_COMPLEX) 1316 PetscBool hermitian, isset; 1317 #endif 1318 1319 PetscFunctionBegin; 1320 #if defined(PETSC_USE_COMPLEX) 1321 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1322 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1323 #endif 1324 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1325 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1326 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1327 1328 aa = (Mat_SeqAIJ *)Ad->data; 1329 bb = (Mat_SeqAIJ *)Ao->data; 1330 ai = aa->i; 1331 aj = aa->j; 1332 bi = bb->i; 1333 bj = bb->j; 1334 PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL)); 1335 rstart = A->rmap->rstart; 1336 1337 if (reuse == MAT_INITIAL_MATRIX) { 1338 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 1339 nzb = 0; /* num of upper triangular entries in mat->B */ 1340 for (i = 0; i < m; i++) { 1341 nza += (ai[i + 1] - adiag[i]); 1342 countB = bi[i + 1] - bi[i]; 1343 bjj = bj + bi[i]; 1344 for (j = 0; j < countB; j++) { 1345 if (garray[bjj[j]] > rstart) nzb++; 1346 } 1347 } 1348 1349 nz = nza + nzb; /* total nz of upper triangular part of mat */ 1350 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1351 PetscCall(PetscMalloc1(nz, &val)); 1352 mumps->nnz = nz; 1353 mumps->irn = row; 1354 mumps->jcn = col; 1355 mumps->val = mumps->val_alloc = val; 1356 } else { 1357 val = mumps->val; 1358 } 1359 1360 jj = 0; 1361 irow = rstart; 1362 for (i = 0; i < m; i++) { 1363 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 1364 v1 = av + adiag[i]; 1365 countA = ai[i + 1] - adiag[i]; 1366 countB = bi[i + 1] - bi[i]; 1367 bjj = bj + bi[i]; 1368 v2 = bv + bi[i]; 1369 1370 /* A-part */ 1371 for (j = 0; j < countA; j++) { 1372 if (reuse == MAT_INITIAL_MATRIX) { 1373 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1374 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1375 } 1376 val[jj++] = v1[j]; 1377 } 1378 1379 /* B-part */ 1380 for (j = 0; j < countB; j++) { 1381 if (garray[bjj[j]] > rstart) { 1382 if (reuse == MAT_INITIAL_MATRIX) { 1383 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1384 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1385 } 1386 val[jj++] = v2[j]; 1387 } 1388 } 1389 irow++; 1390 } 1391 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1392 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1393 PetscFunctionReturn(PETSC_SUCCESS); 1394 } 1395 1396 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1397 { 1398 const PetscScalar *av; 1399 const PetscInt M = A->rmap->n; 1400 PetscCount i; 1401 PetscMUMPSInt *row, *col; 1402 Vec v; 1403 1404 PetscFunctionBegin; 1405 PetscCall(MatDiagonalGetDiagonal(A, &v)); 1406 PetscCall(VecGetArrayRead(v, &av)); 1407 if (reuse == MAT_INITIAL_MATRIX) { 1408 PetscCall(PetscMalloc2(M, &row, M, &col)); 1409 for (i = 0; i < M; i++) { 1410 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1411 col[i] = row[i]; 1412 } 1413 mumps->val = (PetscScalar *)av; 1414 mumps->irn = row; 1415 mumps->jcn = col; 1416 mumps->nnz = M; 1417 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1418 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1419 PetscCall(VecRestoreArrayRead(v, &av)); 1420 PetscFunctionReturn(PETSC_SUCCESS); 1421 } 1422 1423 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1424 { 1425 PetscScalar *v; 1426 const PetscInt m = A->rmap->n, N = A->cmap->N; 1427 PetscInt lda; 1428 PetscCount i, j; 1429 PetscMUMPSInt *row, *col; 1430 1431 PetscFunctionBegin; 1432 PetscCall(MatDenseGetArray(A, &v)); 1433 PetscCall(MatDenseGetLDA(A, &lda)); 1434 if (reuse == MAT_INITIAL_MATRIX) { 1435 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1436 for (i = 0; i < m; i++) { 1437 col[i] = 0; 1438 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1439 } 1440 for (j = 1; j < N; j++) { 1441 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1442 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1443 } 1444 if (lda == m) mumps->val = v; 1445 else { 1446 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1447 mumps->val_alloc = mumps->val; 1448 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1449 } 1450 mumps->irn = row; 1451 mumps->jcn = col; 1452 mumps->nnz = m * N; 1453 } else { 1454 if (lda == m && !mumps->nest_vals) mumps->val = v; 1455 else { 1456 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1457 } 1458 } 1459 PetscCall(MatDenseRestoreArray(A, &v)); 1460 PetscFunctionReturn(PETSC_SUCCESS); 1461 } 1462 1463 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a 1464 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated 1465 // and its rows and columns permuted 1466 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest() 1467 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap) 1468 { 1469 Mat A; 1470 PetscScalar s[2]; 1471 PetscBool isTrans, isHTrans, compare; 1472 1473 PetscFunctionBegin; 1474 do { 1475 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1476 if (isTrans) { 1477 PetscCall(MatTransposeGetMat(*sub, &A)); 1478 isHTrans = PETSC_FALSE; 1479 } else { 1480 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1481 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A)); 1482 } 1483 compare = (PetscBool)(isTrans || isHTrans); 1484 if (compare) { 1485 if (vshift && vscale) { 1486 PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 1487 if (!*conjugate) { 1488 *vshift += s[0] * *vscale; 1489 *vscale *= s[1]; 1490 } else { 1491 *vshift += PetscConj(s[0]) * *vscale; 1492 *vscale *= PetscConj(s[1]); 1493 } 1494 } 1495 if (swap) *swap = (PetscBool)!*swap; 1496 if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate; 1497 *sub = A; 1498 } 1499 } while (compare); 1500 PetscFunctionReturn(PETSC_SUCCESS); 1501 } 1502 1503 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1504 { 1505 Mat **mats; 1506 PetscInt nr, nc; 1507 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1508 1509 PetscFunctionBegin; 1510 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1511 if (reuse == MAT_INITIAL_MATRIX) { 1512 PetscMUMPSInt *irns, *jcns; 1513 PetscScalar *vals; 1514 PetscCount totnnz, cumnnz, maxnnz; 1515 PetscInt *pjcns_w, Mbs = 0; 1516 IS *rows, *cols; 1517 PetscInt **rows_idx, **cols_idx; 1518 1519 cumnnz = 0; 1520 maxnnz = 0; 1521 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1522 for (PetscInt r = 0; r < nr; r++) { 1523 for (PetscInt c = 0; c < nc; c++) { 1524 Mat sub = mats[r][c]; 1525 1526 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1527 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1528 if (sub) { 1529 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1530 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 1531 MatInfo info; 1532 1533 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 1534 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1535 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1536 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1537 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1538 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1539 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1540 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1541 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1542 1543 if (chol) { 1544 if (r == c) { 1545 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1546 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1547 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1548 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1549 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1550 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1551 } else { 1552 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1553 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1554 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1555 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1556 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1557 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1558 } 1559 } else { 1560 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1561 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1562 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1563 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1564 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1565 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1566 } 1567 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1568 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1569 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1570 cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */ 1571 maxnnz = PetscMax(maxnnz, info.nz_used); 1572 } 1573 } 1574 } 1575 1576 /* Allocate total COO */ 1577 totnnz = cumnnz; 1578 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1579 PetscCall(PetscMalloc1(totnnz, &vals)); 1580 1581 /* Handle rows and column maps 1582 We directly map rows and use an SF for the columns */ 1583 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1584 PetscCall(MatNestGetISs(A, rows, cols)); 1585 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1586 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1587 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1588 else (void)maxnnz; 1589 1590 cumnnz = 0; 1591 for (PetscInt r = 0; r < nr; r++) { 1592 for (PetscInt c = 0; c < nc; c++) { 1593 Mat sub = mats[r][c]; 1594 const PetscInt *ridx = rows_idx[r]; 1595 const PetscInt *cidx = cols_idx[c]; 1596 PetscScalar vscale = 1.0, vshift = 0.0; 1597 PetscInt rst, size, bs; 1598 PetscSF csf; 1599 PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE; 1600 PetscLayout cmap; 1601 PetscInt innz; 1602 1603 mumps->nest_vals_start[r * nc + c] = cumnnz; 1604 if (c == r) { 1605 PetscCall(ISGetSize(rows[r], &size)); 1606 if (!mumps->nest_convert_to_triples[r * nc + c]) { 1607 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1608 } 1609 PetscCall(MatGetBlockSize(sub, &bs)); 1610 Mbs += size / bs; 1611 } 1612 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1613 1614 /* Extract inner blocks if needed */ 1615 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap)); 1616 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1617 1618 /* Get column layout to map off-process columns */ 1619 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1620 1621 /* Get row start to map on-process rows */ 1622 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1623 1624 /* Directly use the mumps datastructure and use C ordering for now */ 1625 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1626 1627 /* Swap the role of rows and columns indices for transposed blocks 1628 since we need values with global final ordering */ 1629 if (swap) { 1630 cidx = rows_idx[r]; 1631 ridx = cols_idx[c]; 1632 } 1633 1634 /* Communicate column indices 1635 This could have been done with a single SF but it would have complicated the code a lot. 1636 But since we do it only once, we pay the price of setting up an SF for each block */ 1637 if (PetscDefined(USE_64BIT_INDICES)) { 1638 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1639 } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 1640 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1641 PetscCall(PetscIntCast(mumps->nnz, &innz)); 1642 PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1643 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1644 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1645 PetscCall(PetscSFDestroy(&csf)); 1646 1647 /* Import indices: use direct map for rows and mapped indices for columns */ 1648 if (swap) { 1649 for (PetscInt k = 0; k < mumps->nnz; k++) { 1650 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1651 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1652 } 1653 } else { 1654 for (PetscInt k = 0; k < mumps->nnz; k++) { 1655 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1656 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1657 } 1658 } 1659 1660 /* Import values to full COO */ 1661 if (conjugate) { /* conjugate the entries */ 1662 PetscScalar *v = vals + cumnnz; 1663 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]); 1664 } else if (vscale != 1.0) { 1665 PetscScalar *v = vals + cumnnz; 1666 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k]; 1667 } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1668 1669 /* Shift new starting point and sanity check */ 1670 cumnnz += mumps->nnz; 1671 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1672 1673 /* Free scratch memory */ 1674 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1675 PetscCall(PetscFree(mumps->val_alloc)); 1676 mumps->val = NULL; 1677 mumps->nnz = 0; 1678 } 1679 } 1680 if (mumps->id.ICNTL(15) == 1) { 1681 if (Mbs != A->rmap->N) { 1682 PetscMPIInt rank, size; 1683 1684 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1685 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1686 if (rank == 0) { 1687 PetscInt shift = 0; 1688 1689 PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk)); 1690 PetscCall(PetscFree(mumps->id.blkptr)); 1691 PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr)); 1692 mumps->id.blkptr[0] = 1; 1693 for (PetscInt i = 0; i < size; ++i) { 1694 for (PetscInt r = 0; r < nr; r++) { 1695 Mat sub = mats[r][r]; 1696 const PetscInt *ranges; 1697 PetscInt bs; 1698 1699 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1700 PetscCall(MatGetOwnershipRanges(sub, &ranges)); 1701 PetscCall(MatGetBlockSize(sub, &bs)); 1702 for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1)); 1703 shift += (ranges[i + 1] - ranges[i]) / bs; 1704 } 1705 } 1706 } 1707 } else mumps->id.ICNTL(15) = 0; 1708 } 1709 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1710 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1711 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1712 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1713 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1714 mumps->nest_vals_start[nr * nc] = cumnnz; 1715 1716 /* Set pointers for final MUMPS data structure */ 1717 mumps->nest_vals = vals; 1718 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1719 mumps->val = vals; 1720 mumps->irn = irns; 1721 mumps->jcn = jcns; 1722 mumps->nnz = cumnnz; 1723 } else { 1724 PetscScalar *oval = mumps->nest_vals; 1725 for (PetscInt r = 0; r < nr; r++) { 1726 for (PetscInt c = 0; c < nc; c++) { 1727 PetscBool conjugate = PETSC_FALSE; 1728 Mat sub = mats[r][c]; 1729 PetscScalar vscale = 1.0, vshift = 0.0; 1730 PetscInt midx = r * nc + c; 1731 1732 if (!mumps->nest_convert_to_triples[midx]) continue; 1733 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL)); 1734 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1735 mumps->val = oval + mumps->nest_vals_start[midx]; 1736 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1737 if (conjugate) { 1738 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1739 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]); 1740 } else if (vscale != 1.0) { 1741 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1742 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale; 1743 } 1744 } 1745 } 1746 mumps->val = oval; 1747 } 1748 PetscFunctionReturn(PETSC_SUCCESS); 1749 } 1750 1751 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1752 { 1753 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1754 1755 PetscFunctionBegin; 1756 PetscCall(PetscFree(mumps->id.isol_loc)); 1757 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1758 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1759 PetscCall(VecDestroy(&mumps->b_seq)); 1760 PetscCall(VecDestroy(&mumps->x_seq)); 1761 PetscCall(PetscFree(mumps->id.perm_in)); 1762 PetscCall(PetscFree(mumps->id.blkvar)); 1763 PetscCall(PetscFree(mumps->id.blkptr)); 1764 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1765 PetscCall(PetscFree(mumps->val_alloc)); 1766 PetscCall(PetscFree(mumps->info)); 1767 PetscCall(PetscFree(mumps->ICNTL_pre)); 1768 PetscCall(PetscFree(mumps->CNTL_pre)); 1769 PetscCall(MatMumpsResetSchur_Private(mumps)); 1770 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1771 mumps->id.job = JOB_END; 1772 PetscMUMPS_c(mumps); 1773 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1774 if (mumps->mumps_comm != MPI_COMM_NULL) { 1775 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1776 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1777 } 1778 } 1779 PetscCall(MatMumpsFreeInternalID(&mumps->id)); 1780 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1781 if (mumps->use_petsc_omp_support) { 1782 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1783 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1784 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1785 } 1786 #endif 1787 PetscCall(PetscFree(mumps->ia_alloc)); 1788 PetscCall(PetscFree(mumps->ja_alloc)); 1789 PetscCall(PetscFree(mumps->recvcount)); 1790 PetscCall(PetscFree(mumps->reqs)); 1791 PetscCall(PetscFree(mumps->irhs_loc)); 1792 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1793 PetscCall(PetscFree(mumps->nest_vals)); 1794 PetscCall(PetscFree(A->data)); 1795 1796 /* clear composed functions */ 1797 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1798 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1799 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1800 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1801 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1802 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1803 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1804 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1805 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1806 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1807 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1808 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1809 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1810 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1811 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL)); 1812 PetscFunctionReturn(PETSC_SUCCESS); 1813 } 1814 1815 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1816 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1817 { 1818 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1819 const PetscMPIInt ompsize = mumps->omp_comm_size; 1820 PetscInt i, m, M, rstart; 1821 1822 PetscFunctionBegin; 1823 PetscCall(MatGetSize(A, &M, NULL)); 1824 PetscCall(MatGetLocalSize(A, &m, NULL)); 1825 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1826 if (ompsize == 1) { 1827 if (!mumps->irhs_loc) { 1828 mumps->nloc_rhs = (PetscMUMPSInt)m; 1829 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1830 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1831 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */ 1832 } 1833 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1834 } else { 1835 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1836 const PetscInt *ranges; 1837 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1838 MPI_Group petsc_group, omp_group; 1839 PetscScalar *recvbuf = NULL; 1840 1841 if (mumps->is_omp_master) { 1842 /* Lazily initialize the omp stuff for distributed rhs */ 1843 if (!mumps->irhs_loc) { 1844 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1845 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1846 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1847 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1848 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1849 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1850 1851 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1852 mumps->nloc_rhs = 0; 1853 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1854 for (j = 0; j < ompsize; j++) { 1855 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1856 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1857 } 1858 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1859 for (j = k = 0; j < ompsize; j++) { 1860 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */ 1861 } 1862 1863 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1864 PetscCallMPI(MPI_Group_free(&petsc_group)); 1865 PetscCallMPI(MPI_Group_free(&omp_group)); 1866 } 1867 1868 /* Realloc buffers when current nrhs is bigger than what we have met */ 1869 if (nrhs > mumps->max_nrhs) { 1870 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1871 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1872 mumps->max_nrhs = nrhs; 1873 } 1874 1875 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1876 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1877 mumps->rhs_disps[0] = 0; 1878 for (j = 1; j < ompsize; j++) { 1879 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1880 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1881 } 1882 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1883 } 1884 1885 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1886 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1887 1888 if (mumps->is_omp_master) { 1889 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1890 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1891 for (j = 0; j < ompsize; j++) { 1892 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1893 dst = dstbase; 1894 for (i = 0; i < nrhs; i++) { 1895 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1896 src += mumps->rhs_nrow[j]; 1897 dst += mumps->nloc_rhs; 1898 } 1899 dstbase += mumps->rhs_nrow[j]; 1900 } 1901 } 1902 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1903 } 1904 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1905 } 1906 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1907 mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs; 1908 mumps->id.lrhs_loc = mumps->nloc_rhs; 1909 mumps->id.irhs_loc = mumps->irhs_loc; 1910 PetscFunctionReturn(PETSC_SUCCESS); 1911 } 1912 1913 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1914 { 1915 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1916 const PetscScalar *barray = NULL; 1917 PetscScalar *array; 1918 IS is_iden, is_petsc; 1919 PetscInt i; 1920 PetscBool second_solve = PETSC_FALSE; 1921 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1922 1923 PetscFunctionBegin; 1924 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1925 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1926 &cite1)); 1927 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1928 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1929 &cite2)); 1930 1931 PetscCall(VecFlag(x, A->factorerrortype)); 1932 if (A->factorerrortype) { 1933 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1934 PetscFunctionReturn(PETSC_SUCCESS); 1935 } 1936 1937 mumps->id.nrhs = 1; 1938 if (mumps->petsc_size > 1) { 1939 if (mumps->ICNTL20 == 10) { 1940 mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */ 1941 PetscCall(VecGetArrayRead(b, &barray)); 1942 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray)); 1943 } else { 1944 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/ 1945 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1946 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1947 if (!mumps->myid) { 1948 PetscCall(VecGetArray(mumps->b_seq, &array)); 1949 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1950 } 1951 } 1952 } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */ 1953 mumps->id.ICNTL(20) = 0; 1954 PetscCall(VecCopy(b, x)); 1955 PetscCall(VecGetArray(x, &array)); 1956 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1957 } 1958 1959 /* 1960 handle condensation step of Schur complement (if any) 1961 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1962 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1963 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1964 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1965 */ 1966 if (mumps->id.size_schur > 0) { 1967 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1968 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1969 second_solve = PETSC_TRUE; 1970 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 1971 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1972 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1973 } 1974 1975 mumps->id.job = JOB_SOLVE; 1976 PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs 1977 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1978 1979 /* handle expansion step of Schur complement (if any) */ 1980 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1981 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 1982 PetscCall(MatMumpsSolveSchur_Private(A)); 1983 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i); 1984 } 1985 1986 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */ 1987 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1988 /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1989 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1990 } 1991 if (!mumps->scat_sol) { /* create scatter scat_sol */ 1992 PetscInt *isol2_loc = NULL; 1993 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 1994 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 1995 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 1996 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 1997 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 1998 PetscCall(ISDestroy(&is_iden)); 1999 PetscCall(ISDestroy(&is_petsc)); 2000 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 2001 } 2002 2003 PetscScalar *xarray; 2004 PetscCall(VecGetArray(mumps->x_seq, &xarray)); 2005 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray)); 2006 PetscCall(VecRestoreArray(mumps->x_seq, &xarray)); 2007 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2008 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2009 2010 if (mumps->ICNTL20 == 10) { // distributed RHS 2011 PetscCall(VecRestoreArrayRead(b, &barray)); 2012 } else if (!mumps->myid) { // centralized RHS 2013 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 2014 } 2015 } else { 2016 // id.rhs has the solution in mumps precision 2017 PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array)); 2018 PetscCall(VecRestoreArray(x, &array)); 2019 } 2020 2021 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2022 PetscFunctionReturn(PETSC_SUCCESS); 2023 } 2024 2025 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 2026 { 2027 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2028 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2029 2030 PetscFunctionBegin; 2031 mumps->id.ICNTL(9) = 0; 2032 PetscCall(MatSolve_MUMPS(A, b, x)); 2033 mumps->id.ICNTL(9) = value; 2034 PetscFunctionReturn(PETSC_SUCCESS); 2035 } 2036 2037 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 2038 { 2039 Mat Bt = NULL; 2040 PetscBool denseX, denseB, flg, flgT; 2041 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2042 PetscInt i, nrhs, M, nrhsM; 2043 PetscScalar *array; 2044 const PetscScalar *barray; 2045 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 2046 PetscMUMPSInt *isol_loc, *isol_loc_save; 2047 PetscScalar *sol_loc; 2048 void *sol_loc_save; 2049 PetscCount sol_loc_len_save; 2050 IS is_to, is_from; 2051 PetscInt k, proc, j, m, myrstart; 2052 const PetscInt *rstart; 2053 Vec v_mpi, msol_loc; 2054 VecScatter scat_sol; 2055 Vec b_seq; 2056 VecScatter scat_rhs; 2057 PetscScalar *aa; 2058 PetscInt spnr, *ia, *ja; 2059 Mat_MPIAIJ *b = NULL; 2060 2061 PetscFunctionBegin; 2062 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 2063 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 2064 2065 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 2066 2067 if (denseB) { 2068 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 2069 mumps->id.ICNTL(20) = 0; /* dense RHS */ 2070 } else { /* sparse B */ 2071 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 2072 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 2073 PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 2074 PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 2075 /* input B is transpose of actual RHS matrix, 2076 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 2077 PetscCall(MatTransposeGetMat(B, &Bt)); 2078 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 2079 } 2080 2081 PetscCall(MatGetSize(B, &M, &nrhs)); 2082 PetscCall(PetscIntMultError(nrhs, M, &nrhsM)); 2083 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 2084 mumps->id.lrhs = (PetscMUMPSInt)M; 2085 2086 if (mumps->petsc_size == 1) { // handle this easy case specially and return early 2087 PetscScalar *aa; 2088 PetscInt spnr, *ia, *ja; 2089 PetscBool second_solve = PETSC_FALSE; 2090 2091 PetscCall(MatDenseGetArray(X, &array)); 2092 if (denseB) { 2093 /* copy B to X */ 2094 PetscCall(MatDenseGetArrayRead(B, &barray)); 2095 PetscCall(PetscArraycpy(array, barray, nrhsM)); 2096 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2097 } else { /* sparse B */ 2098 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 2099 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2100 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2101 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2102 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2103 } 2104 PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2105 2106 /* handle condensation step of Schur complement (if any) */ 2107 if (mumps->id.size_schur > 0) { 2108 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 2109 second_solve = PETSC_TRUE; 2110 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 2111 mumps->id.ICNTL(26) = 1; /* condensation phase, i.e, to solve id.redrhs */ 2112 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 2113 } 2114 2115 mumps->id.job = JOB_SOLVE; 2116 PetscMUMPS_c(mumps); 2117 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2118 2119 /* handle expansion step of Schur complement (if any) */ 2120 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 2121 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 2122 PetscCall(MatMumpsSolveSchur_Private(A)); 2123 for (j = 0; j < nrhs; ++j) 2124 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs); 2125 } 2126 2127 if (!denseB) { /* sparse B, restore ia, ja */ 2128 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 2129 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2130 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2131 } 2132 2133 // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X. 2134 PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array)); 2135 PetscCall(MatDenseRestoreArray(X, &array)); 2136 PetscFunctionReturn(PETSC_SUCCESS); 2137 } 2138 2139 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 2140 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 2141 2142 /* create msol_loc to hold mumps local solution */ 2143 isol_loc_save = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */ 2144 sol_loc_save = mumps->id.sol_loc; 2145 sol_loc_len_save = mumps->id.sol_loc_len; 2146 mumps->id.isol_loc = NULL; // an init state 2147 mumps->id.sol_loc = NULL; 2148 mumps->id.sol_loc_len = 0; 2149 2150 lsol_loc = mumps->id.lsol_loc; 2151 PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */ 2152 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 2153 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2154 mumps->id.isol_loc = isol_loc; 2155 2156 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 2157 2158 if (denseB) { 2159 if (mumps->ICNTL20 == 10) { 2160 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 2161 PetscCall(MatDenseGetArrayRead(B, &barray)); 2162 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc 2163 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2164 PetscCall(MatGetLocalSize(B, &m, NULL)); 2165 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X 2166 } else { 2167 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 2168 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 2169 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 2170 0, re-arrange B into desired order, which is a local operation. 2171 */ 2172 2173 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 2174 /* wrap dense rhs matrix B into a vector v_mpi */ 2175 PetscCall(MatGetLocalSize(B, &m, NULL)); 2176 PetscCall(MatDenseGetArrayRead(B, &barray)); 2177 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2178 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2179 2180 /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */ 2181 if (!mumps->myid) { 2182 PetscInt *idx; 2183 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 2184 PetscCall(PetscMalloc1(nrhsM, &idx)); 2185 PetscCall(MatGetOwnershipRanges(B, &rstart)); 2186 for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) { 2187 for (j = 0; j < nrhs; j++) { 2188 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 2189 } 2190 } 2191 2192 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq)); 2193 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to)); 2194 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from)); 2195 } else { 2196 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 2197 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 2198 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 2199 } 2200 2201 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 2202 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2203 PetscCall(ISDestroy(&is_to)); 2204 PetscCall(ISDestroy(&is_from)); 2205 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2206 2207 if (!mumps->myid) { /* define rhs on the host */ 2208 PetscCall(VecGetArrayRead(b_seq, &barray)); 2209 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2210 PetscCall(VecRestoreArrayRead(b_seq, &barray)); 2211 } 2212 } 2213 } else { /* sparse B */ 2214 b = (Mat_MPIAIJ *)Bt->data; 2215 2216 /* wrap dense X into a vector v_mpi */ 2217 PetscCall(MatGetLocalSize(X, &m, NULL)); 2218 PetscCall(MatDenseGetArrayRead(X, &barray)); 2219 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2220 PetscCall(MatDenseRestoreArrayRead(X, &barray)); 2221 2222 if (!mumps->myid) { 2223 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 2224 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2225 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2226 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2227 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2228 } else { 2229 mumps->id.irhs_ptr = NULL; 2230 mumps->id.irhs_sparse = NULL; 2231 mumps->id.nz_rhs = 0; 2232 if (mumps->id.rhs_sparse_len) { 2233 PetscCall(PetscFree(mumps->id.rhs_sparse)); 2234 mumps->id.rhs_sparse_len = 0; 2235 } 2236 } 2237 } 2238 2239 /* solve phase */ 2240 mumps->id.job = JOB_SOLVE; 2241 PetscMUMPS_c(mumps); 2242 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2243 2244 /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */ 2245 PetscCall(MatDenseGetArray(X, &array)); 2246 PetscCall(VecPlaceArray(v_mpi, array)); 2247 2248 /* create scatter scat_sol */ 2249 PetscCall(MatGetOwnershipRanges(X, &rstart)); 2250 /* iidx: index for scatter mumps solution to PETSc X */ 2251 2252 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 2253 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 2254 for (i = 0; i < lsol_loc; i++) { 2255 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 2256 2257 for (proc = 0; proc < mumps->petsc_size; proc++) { 2258 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 2259 myrstart = rstart[proc]; 2260 k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */ 2261 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */ 2262 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 2263 break; 2264 } 2265 } 2266 2267 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 2268 } 2269 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 2270 PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[] 2271 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 2272 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2273 PetscCall(ISDestroy(&is_from)); 2274 PetscCall(ISDestroy(&is_to)); 2275 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2276 PetscCall(MatDenseRestoreArray(X, &array)); 2277 2278 if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers 2279 mumps->id.sol_loc_len = 0; 2280 PetscCall(PetscFree(mumps->id.sol_loc)); 2281 } 2282 2283 // restore old values 2284 mumps->id.sol_loc = sol_loc_save; 2285 mumps->id.sol_loc_len = sol_loc_len_save; 2286 mumps->id.isol_loc = isol_loc_save; 2287 2288 PetscCall(PetscFree2(sol_loc, isol_loc)); 2289 PetscCall(PetscFree(idxx)); 2290 PetscCall(VecDestroy(&msol_loc)); 2291 PetscCall(VecDestroy(&v_mpi)); 2292 if (!denseB) { 2293 if (!mumps->myid) { 2294 b = (Mat_MPIAIJ *)Bt->data; 2295 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 2296 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2297 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2298 } 2299 } else { 2300 if (mumps->ICNTL20 == 0) { 2301 PetscCall(VecDestroy(&b_seq)); 2302 PetscCall(VecScatterDestroy(&scat_rhs)); 2303 } 2304 } 2305 PetscCall(VecScatterDestroy(&scat_sol)); 2306 PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2307 PetscFunctionReturn(PETSC_SUCCESS); 2308 } 2309 2310 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 2311 { 2312 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2313 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2314 2315 PetscFunctionBegin; 2316 mumps->id.ICNTL(9) = 0; 2317 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2318 mumps->id.ICNTL(9) = value; 2319 PetscFunctionReturn(PETSC_SUCCESS); 2320 } 2321 2322 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 2323 { 2324 PetscBool flg; 2325 Mat B; 2326 2327 PetscFunctionBegin; 2328 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2329 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 2330 2331 /* Create B=Bt^T that uses Bt's data structure */ 2332 PetscCall(MatCreateTranspose(Bt, &B)); 2333 2334 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2335 PetscCall(MatDestroy(&B)); 2336 PetscFunctionReturn(PETSC_SUCCESS); 2337 } 2338 2339 #if !defined(PETSC_USE_COMPLEX) 2340 /* 2341 input: 2342 F: numeric factor 2343 output: 2344 nneg: total number of negative pivots 2345 nzero: total number of zero pivots 2346 npos: (global dimension of F) - nneg - nzero 2347 */ 2348 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 2349 { 2350 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2351 PetscMPIInt size; 2352 2353 PetscFunctionBegin; 2354 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 2355 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 2356 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 2357 2358 if (nneg) *nneg = mumps->id.INFOG(12); 2359 if (nzero || npos) { 2360 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2361 if (nzero) *nzero = mumps->id.INFOG(28); 2362 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 2363 } 2364 PetscFunctionReturn(PETSC_SUCCESS); 2365 } 2366 #endif 2367 2368 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 2369 { 2370 PetscMPIInt nreqs; 2371 PetscMUMPSInt *irn, *jcn; 2372 PetscMPIInt count; 2373 PetscCount totnnz, remain; 2374 const PetscInt osize = mumps->omp_comm_size; 2375 PetscScalar *val; 2376 2377 PetscFunctionBegin; 2378 if (osize > 1) { 2379 if (reuse == MAT_INITIAL_MATRIX) { 2380 /* master first gathers counts of nonzeros to receive */ 2381 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 2382 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 2383 2384 /* Then each computes number of send/recvs */ 2385 if (mumps->is_omp_master) { 2386 /* Start from 1 since self communication is not done in MPI */ 2387 nreqs = 0; 2388 for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 2389 } else { 2390 nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX)); 2391 } 2392 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 2393 2394 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 2395 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 2396 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 2397 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 2398 */ 2399 nreqs = 0; /* counter for actual send/recvs */ 2400 if (mumps->is_omp_master) { 2401 totnnz = 0; 2402 2403 for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 2404 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 2405 PetscCall(PetscMalloc1(totnnz, &val)); 2406 2407 /* Self communication */ 2408 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 2409 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 2410 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 2411 2412 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 2413 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 2414 PetscCall(PetscFree(mumps->val_alloc)); 2415 mumps->nnz = totnnz; 2416 mumps->irn = irn; 2417 mumps->jcn = jcn; 2418 mumps->val = mumps->val_alloc = val; 2419 2420 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 2421 jcn += mumps->recvcount[0]; 2422 val += mumps->recvcount[0]; 2423 2424 /* Remote communication */ 2425 for (PetscMPIInt i = 1; i < osize; i++) { 2426 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2427 remain = mumps->recvcount[i] - count; 2428 while (count > 0) { 2429 PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2430 PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2431 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2432 irn += count; 2433 jcn += count; 2434 val += count; 2435 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2436 remain -= count; 2437 } 2438 } 2439 } else { 2440 irn = mumps->irn; 2441 jcn = mumps->jcn; 2442 val = mumps->val; 2443 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2444 remain = mumps->nnz - count; 2445 while (count > 0) { 2446 PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2447 PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2448 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2449 irn += count; 2450 jcn += count; 2451 val += count; 2452 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2453 remain -= count; 2454 } 2455 } 2456 } else { 2457 nreqs = 0; 2458 if (mumps->is_omp_master) { 2459 val = mumps->val + mumps->recvcount[0]; 2460 for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 2461 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2462 remain = mumps->recvcount[i] - count; 2463 while (count > 0) { 2464 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2465 val += count; 2466 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2467 remain -= count; 2468 } 2469 } 2470 } else { 2471 val = mumps->val; 2472 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2473 remain = mumps->nnz - count; 2474 while (count > 0) { 2475 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2476 val += count; 2477 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2478 remain -= count; 2479 } 2480 } 2481 } 2482 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 2483 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 2484 } 2485 PetscFunctionReturn(PETSC_SUCCESS); 2486 } 2487 2488 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 2489 { 2490 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2491 2492 PetscFunctionBegin; 2493 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 2494 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2495 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2496 PetscFunctionReturn(PETSC_SUCCESS); 2497 } 2498 2499 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2500 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2501 2502 /* numerical factorization phase */ 2503 mumps->id.job = JOB_FACTNUMERIC; 2504 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2505 if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2506 } else { 2507 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2508 } 2509 2510 if (F->schur) { 2511 const PetscScalar *array; 2512 MUMPS_INT size = mumps->id.size_schur; 2513 PetscCall(MatDenseGetArrayRead(F->schur, &array)); 2514 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur)); 2515 PetscCall(MatDenseRestoreArrayRead(F->schur, &array)); 2516 } 2517 2518 PetscMUMPS_c(mumps); 2519 if (mumps->id.INFOG(1) < 0) { 2520 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2521 if (mumps->id.INFOG(1) == -10) { 2522 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2523 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2524 } else if (mumps->id.INFOG(1) == -13) { 2525 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2526 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2527 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2528 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2529 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2530 } else { 2531 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2532 F->factorerrortype = MAT_FACTOR_OTHER; 2533 } 2534 } 2535 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2536 2537 F->assembled = PETSC_TRUE; 2538 2539 if (F->schur) { /* reset Schur status to unfactored */ 2540 #if defined(PETSC_HAVE_CUDA) 2541 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2542 #endif 2543 PetscScalar *array; 2544 PetscCall(MatDenseGetArray(F->schur, &array)); 2545 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array)); 2546 PetscCall(MatDenseRestoreArray(F->schur, &array)); 2547 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2548 mumps->id.ICNTL(19) = 2; 2549 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2550 } 2551 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2552 } 2553 2554 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2555 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2556 2557 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2558 // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the 2559 // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for 2560 // which the solution (in SOL_loc) is available on the local processor. 2561 // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will 2562 // normally have the same contents for each of these calls. The only exception is the case of 2563 // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non 2564 // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase. 2565 if (mumps->petsc_size > 1) { 2566 PetscInt lsol_loc; 2567 PetscScalar *array; 2568 2569 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2570 if (mumps->x_seq) { 2571 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2572 PetscCall(PetscFree(mumps->id.isol_loc)); 2573 PetscCall(VecDestroy(&mumps->x_seq)); 2574 } 2575 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2576 PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc)); 2577 PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq)); 2578 PetscCall(VecGetArray(mumps->x_seq, &array)); 2579 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2580 PetscCall(VecRestoreArray(mumps->x_seq, &array)); 2581 mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc; 2582 } 2583 PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2))); 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 /* Sets MUMPS options from the options database */ 2588 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2589 { 2590 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2591 PetscReal cntl; 2592 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2593 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2594 PetscBool flg = PETSC_FALSE; 2595 PetscBool schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1); 2596 void *arr; 2597 2598 PetscFunctionBegin; 2599 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2600 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2601 PetscPrecision precision = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE; 2602 PetscInt nthreads = 0; 2603 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2604 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2605 PetscMUMPSInt nblk, *blkvar, *blkptr; 2606 2607 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2608 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2609 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2610 2611 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2612 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2613 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2614 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2615 if (mumps->use_petsc_omp_support) { 2616 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2617 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2618 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2619 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2620 #else 2621 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2622 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2623 #endif 2624 } else { 2625 mumps->omp_comm = PETSC_COMM_SELF; 2626 mumps->mumps_comm = mumps->petsc_comm; 2627 mumps->is_omp_master = PETSC_TRUE; 2628 } 2629 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2630 mumps->reqs = NULL; 2631 mumps->tag = 0; 2632 2633 if (mumps->mumps_comm != MPI_COMM_NULL) { 2634 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2635 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2636 MPI_Comm comm; 2637 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2638 mumps->mumps_comm = comm; 2639 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2640 } 2641 2642 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2643 mumps->id.job = JOB_INIT; 2644 mumps->id.par = 1; /* host participates factorizaton and solve */ 2645 mumps->id.sym = mumps->sym; 2646 2647 size = mumps->id.size_schur; 2648 arr = mumps->id.schur; 2649 listvar_schur = mumps->id.listvar_schur; 2650 nblk = mumps->id.nblk; 2651 blkvar = mumps->id.blkvar; 2652 blkptr = mumps->id.blkptr; 2653 if (PetscDefined(USE_DEBUG)) { 2654 for (PetscInt i = 0; i < size; i++) 2655 PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1, 2656 A->rmap->N); 2657 } 2658 2659 PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL)); 2660 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]); 2661 PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar"); 2662 PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision)); 2663 2664 PetscMUMPS_c(mumps); 2665 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2666 2667 /* set PETSc-MUMPS default options - override MUMPS default */ 2668 mumps->id.ICNTL(3) = 0; 2669 mumps->id.ICNTL(4) = 0; 2670 if (mumps->petsc_size == 1) { 2671 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2672 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2673 } else { 2674 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2675 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2676 } 2677 if (nblk && blkptr) { 2678 mumps->id.ICNTL(15) = 1; 2679 mumps->id.nblk = nblk; 2680 mumps->id.blkvar = blkvar; 2681 mumps->id.blkptr = blkptr; 2682 } else mumps->id.ICNTL(15) = 0; 2683 2684 /* restore cached ICNTL and CNTL values */ 2685 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2686 for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]); 2687 2688 PetscCall(PetscFree(mumps->ICNTL_pre)); 2689 PetscCall(PetscFree(mumps->CNTL_pre)); 2690 2691 if (schur) { 2692 mumps->id.size_schur = size; 2693 mumps->id.schur_lld = size; 2694 mumps->id.schur = arr; 2695 mumps->id.listvar_schur = listvar_schur; 2696 if (mumps->petsc_size > 1) { 2697 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2698 2699 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2700 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2701 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm)); 2702 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2703 } else { 2704 if (F->factortype == MAT_FACTOR_LU) { 2705 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2706 } else { 2707 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2708 } 2709 } 2710 mumps->id.ICNTL(26) = -1; 2711 } 2712 2713 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2714 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2715 */ 2716 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2717 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); 2718 2719 mumps->scat_rhs = NULL; 2720 mumps->scat_sol = NULL; 2721 } 2722 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2723 if (flg) mumps->id.ICNTL(1) = icntl; 2724 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2725 if (flg) mumps->id.ICNTL(2) = icntl; 2726 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2727 if (flg) mumps->id.ICNTL(3) = icntl; 2728 2729 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2730 if (flg) mumps->id.ICNTL(4) = icntl; 2731 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2732 2733 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2734 if (flg) mumps->id.ICNTL(6) = icntl; 2735 2736 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2737 if (flg) { 2738 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2739 mumps->id.ICNTL(7) = icntl; 2740 } 2741 2742 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2743 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2744 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2745 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2746 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2747 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2748 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2749 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2750 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs; 2751 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2752 if (flg) { 2753 if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2754 else if (mumps->id.ICNTL(15) > 0) { 2755 const PetscInt *bsizes; 2756 PetscInt nblocks, p, *blkptr = NULL; 2757 PetscMPIInt *recvcounts, *displs, n; 2758 PetscMPIInt rank, size = 0; 2759 2760 PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes)); 2761 flg = PETSC_TRUE; 2762 for (p = 0; p < nblocks; ++p) { 2763 if (bsizes[p] > 1) break; 2764 } 2765 if (p == nblocks) flg = PETSC_FALSE; 2766 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 2767 if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1 2768 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2769 if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2770 PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs)); 2771 PetscCall(PetscMPIIntCast(nblocks, &n)); 2772 PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A))); 2773 for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p]; 2774 PetscCall(PetscMalloc1(displs[size] + 1, &blkptr)); 2775 PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2776 PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2777 if (rank == 0) { 2778 blkptr[0] = 1; 2779 for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p]; 2780 PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr)); 2781 } 2782 PetscCall(PetscFree2(recvcounts, displs)); 2783 PetscCall(PetscFree(blkptr)); 2784 } 2785 } 2786 } 2787 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2788 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2789 PetscCall(MatDestroy(&F->schur)); 2790 PetscCall(MatMumpsResetSchur_Private(mumps)); 2791 } 2792 2793 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2794 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2795 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2796 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2797 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2798 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2799 */ 2800 mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */ 2801 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI) 2802 mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */ 2803 #endif 2804 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2805 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2806 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2807 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2808 #endif 2809 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2810 2811 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2812 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2813 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2814 if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 2815 2816 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2817 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2818 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2819 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2820 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2821 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2822 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2823 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2824 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2825 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2826 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2827 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL)); 2828 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2829 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 2830 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL)); 2831 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2832 2833 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg)); 2834 if (flg) ID_CNTL_SET(mumps->id, 1, cntl); 2835 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg)); 2836 if (flg) ID_CNTL_SET(mumps->id, 2, cntl); 2837 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg)); 2838 if (flg) ID_CNTL_SET(mumps->id, 3, cntl); 2839 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg)); 2840 if (flg) ID_CNTL_SET(mumps->id, 4, cntl); 2841 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg)); 2842 if (flg) ID_CNTL_SET(mumps->id, 5, cntl); 2843 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg)); 2844 if (flg) ID_CNTL_SET(mumps->id, 7, cntl); 2845 2846 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2847 2848 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2849 if (ninfo) { 2850 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2851 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2852 mumps->ninfo = ninfo; 2853 for (i = 0; i < ninfo; i++) { 2854 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2855 mumps->info[i] = info[i]; 2856 } 2857 } 2858 PetscOptionsEnd(); 2859 PetscFunctionReturn(PETSC_SUCCESS); 2860 } 2861 2862 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2863 { 2864 PetscFunctionBegin; 2865 if (mumps->id.INFOG(1) < 0) { 2866 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2867 if (mumps->id.INFOG(1) == -6) { 2868 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2869 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2870 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2871 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2872 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2873 } else { 2874 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2875 F->factorerrortype = MAT_FACTOR_OTHER; 2876 } 2877 } 2878 if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 2879 PetscFunctionReturn(PETSC_SUCCESS); 2880 } 2881 2882 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2883 { 2884 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2885 Vec b; 2886 const PetscInt M = A->rmap->N; 2887 2888 PetscFunctionBegin; 2889 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2890 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2891 PetscFunctionReturn(PETSC_SUCCESS); 2892 } 2893 2894 /* Set MUMPS options from the options database */ 2895 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2896 2897 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2898 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2899 2900 /* analysis phase */ 2901 mumps->id.job = JOB_FACTSYMBOLIC; 2902 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2903 switch (mumps->id.ICNTL(18)) { 2904 case 0: /* centralized assembled matrix input */ 2905 if (!mumps->myid) { 2906 mumps->id.nnz = mumps->nnz; 2907 mumps->id.irn = mumps->irn; 2908 mumps->id.jcn = mumps->jcn; 2909 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2910 if (r && mumps->id.ICNTL(7) == 7) { 2911 mumps->id.ICNTL(7) = 1; 2912 if (!mumps->myid) { 2913 const PetscInt *idx; 2914 PetscInt i; 2915 2916 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2917 PetscCall(ISGetIndices(r, &idx)); 2918 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 2919 PetscCall(ISRestoreIndices(r, &idx)); 2920 } 2921 } 2922 } 2923 break; 2924 case 3: /* distributed assembled matrix input (size>1) */ 2925 mumps->id.nnz_loc = mumps->nnz; 2926 mumps->id.irn_loc = mumps->irn; 2927 mumps->id.jcn_loc = mumps->jcn; 2928 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2929 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2930 PetscCall(MatCreateVecs(A, NULL, &b)); 2931 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2932 PetscCall(VecDestroy(&b)); 2933 } 2934 break; 2935 } 2936 PetscMUMPS_c(mumps); 2937 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2938 2939 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2940 F->ops->solve = MatSolve_MUMPS; 2941 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2942 F->ops->matsolve = MatMatSolve_MUMPS; 2943 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2944 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2945 2946 mumps->matstruc = SAME_NONZERO_PATTERN; 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 /* Note the PETSc r and c permutations are ignored */ 2951 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2952 { 2953 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2954 Vec b; 2955 const PetscInt M = A->rmap->N; 2956 2957 PetscFunctionBegin; 2958 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2959 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2960 PetscFunctionReturn(PETSC_SUCCESS); 2961 } 2962 2963 /* Set MUMPS options from the options database */ 2964 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2965 2966 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2967 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2968 2969 /* analysis phase */ 2970 mumps->id.job = JOB_FACTSYMBOLIC; 2971 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2972 switch (mumps->id.ICNTL(18)) { 2973 case 0: /* centralized assembled matrix input */ 2974 if (!mumps->myid) { 2975 mumps->id.nnz = mumps->nnz; 2976 mumps->id.irn = mumps->irn; 2977 mumps->id.jcn = mumps->jcn; 2978 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2979 } 2980 break; 2981 case 3: /* distributed assembled matrix input (size>1) */ 2982 mumps->id.nnz_loc = mumps->nnz; 2983 mumps->id.irn_loc = mumps->irn; 2984 mumps->id.jcn_loc = mumps->jcn; 2985 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2986 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2987 PetscCall(MatCreateVecs(A, NULL, &b)); 2988 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2989 PetscCall(VecDestroy(&b)); 2990 } 2991 break; 2992 } 2993 PetscMUMPS_c(mumps); 2994 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2995 2996 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2997 F->ops->solve = MatSolve_MUMPS; 2998 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2999 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3000 3001 mumps->matstruc = SAME_NONZERO_PATTERN; 3002 PetscFunctionReturn(PETSC_SUCCESS); 3003 } 3004 3005 /* Note the PETSc r permutation and factor info are ignored */ 3006 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 3007 { 3008 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3009 Vec b; 3010 const PetscInt M = A->rmap->N; 3011 3012 PetscFunctionBegin; 3013 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 3014 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 3015 PetscFunctionReturn(PETSC_SUCCESS); 3016 } 3017 3018 /* Set MUMPS options from the options database */ 3019 PetscCall(MatSetFromOptions_MUMPS(F, A)); 3020 3021 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 3022 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 3023 3024 /* analysis phase */ 3025 mumps->id.job = JOB_FACTSYMBOLIC; 3026 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 3027 switch (mumps->id.ICNTL(18)) { 3028 case 0: /* centralized assembled matrix input */ 3029 if (!mumps->myid) { 3030 mumps->id.nnz = mumps->nnz; 3031 mumps->id.irn = mumps->irn; 3032 mumps->id.jcn = mumps->jcn; 3033 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 3034 } 3035 break; 3036 case 3: /* distributed assembled matrix input (size>1) */ 3037 mumps->id.nnz_loc = mumps->nnz; 3038 mumps->id.irn_loc = mumps->irn; 3039 mumps->id.jcn_loc = mumps->jcn; 3040 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 3041 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 3042 PetscCall(MatCreateVecs(A, NULL, &b)); 3043 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 3044 PetscCall(VecDestroy(&b)); 3045 } 3046 break; 3047 } 3048 PetscMUMPS_c(mumps); 3049 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 3050 3051 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 3052 F->ops->solve = MatSolve_MUMPS; 3053 F->ops->solvetranspose = MatSolve_MUMPS; 3054 F->ops->matsolve = MatMatSolve_MUMPS; 3055 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 3056 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3057 #if defined(PETSC_USE_COMPLEX) 3058 F->ops->getinertia = NULL; 3059 #else 3060 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 3061 #endif 3062 3063 mumps->matstruc = SAME_NONZERO_PATTERN; 3064 PetscFunctionReturn(PETSC_SUCCESS); 3065 } 3066 3067 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 3068 { 3069 PetscBool isascii; 3070 PetscViewerFormat format; 3071 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3072 3073 PetscFunctionBegin; 3074 /* check if matrix is mumps type */ 3075 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 3076 3077 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 3078 if (isascii) { 3079 PetscCall(PetscViewerGetFormat(viewer, &format)); 3080 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3081 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 3082 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3083 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 3084 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 3085 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 3086 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 3087 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 3088 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 3089 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 3090 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 3091 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 3092 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 3093 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 3094 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 3095 if (mumps->id.ICNTL(11) > 0) { 3096 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)ID_RINFOG_GET(mumps->id, 4))); 3097 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)ID_RINFOG_GET(mumps->id, 5))); 3098 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)ID_RINFOG_GET(mumps->id, 6))); 3099 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8))); 3100 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)ID_RINFOG_GET(mumps->id, 9))); 3101 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11))); 3102 } 3103 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 3104 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 3105 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 3106 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 3107 /* ICNTL(15-17) not used */ 3108 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 3109 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 3110 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 3111 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 3112 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 3113 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 3114 3115 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 3116 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 3117 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 3118 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 3119 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 3120 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 3121 3122 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 3123 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 3124 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 3125 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 3126 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 3127 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37))); 3128 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 3129 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 3130 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56))); 3131 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 3132 3133 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 1))); 3134 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2))); 3135 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 3))); 3136 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)ID_CNTL_GET(mumps->id, 4))); 3137 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)ID_CNTL_GET(mumps->id, 5))); 3138 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)ID_CNTL_GET(mumps->id, 7))); 3139 3140 /* information local to each processor */ 3141 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 3142 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 3143 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1))); 3144 PetscCall(PetscViewerFlush(viewer)); 3145 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 3146 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2))); 3147 PetscCall(PetscViewerFlush(viewer)); 3148 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 3149 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3))); 3150 PetscCall(PetscViewerFlush(viewer)); 3151 3152 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 3153 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 3154 PetscCall(PetscViewerFlush(viewer)); 3155 3156 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 3157 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 3158 PetscCall(PetscViewerFlush(viewer)); 3159 3160 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 3161 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 3162 PetscCall(PetscViewerFlush(viewer)); 3163 3164 if (mumps->ninfo && mumps->ninfo <= 80) { 3165 PetscInt i; 3166 for (i = 0; i < mumps->ninfo; i++) { 3167 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 3168 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 3169 PetscCall(PetscViewerFlush(viewer)); 3170 } 3171 } 3172 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 3173 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 3174 3175 if (mumps->myid == 0) { /* information from the host */ 3176 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1))); 3177 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2))); 3178 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3))); 3179 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34))); 3180 3181 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 3182 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 3183 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 3184 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 3185 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 3186 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 3187 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 3188 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 3189 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 3190 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 3191 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 3192 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 3193 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 3194 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 3195 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 3196 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 3197 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 3198 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 3199 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 3200 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 3201 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 3202 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 3203 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 3204 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 3205 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 3206 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 3207 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 3208 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 3209 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 3210 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 3211 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 3212 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 3213 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 3214 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 3215 } 3216 } 3217 } 3218 PetscFunctionReturn(PETSC_SUCCESS); 3219 } 3220 3221 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 3222 { 3223 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3224 3225 PetscFunctionBegin; 3226 info->block_size = 1.0; 3227 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3228 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3229 info->nz_unneeded = 0.0; 3230 info->assemblies = 0.0; 3231 info->mallocs = 0.0; 3232 info->memory = 0.0; 3233 info->fill_ratio_given = 0; 3234 info->fill_ratio_needed = 0; 3235 info->factor_mallocs = 0; 3236 PetscFunctionReturn(PETSC_SUCCESS); 3237 } 3238 3239 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 3240 { 3241 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3242 const PetscScalar *arr; 3243 const PetscInt *idxs; 3244 PetscInt size, i; 3245 3246 PetscFunctionBegin; 3247 PetscCall(ISGetLocalSize(is, &size)); 3248 /* Schur complement matrix */ 3249 PetscCall(MatDestroy(&F->schur)); 3250 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 3251 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 3252 // don't allocate mumps->id.schur[] now as its precision is yet to know 3253 PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur)); 3254 PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld)); 3255 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 3256 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 3257 3258 /* MUMPS expects Fortran style indices */ 3259 PetscCall(PetscFree(mumps->id.listvar_schur)); 3260 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 3261 PetscCall(ISGetIndices(is, &idxs)); 3262 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 3263 PetscCall(ISRestoreIndices(is, &idxs)); 3264 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 3265 if (mumps->id.icntl) mumps->id.ICNTL(26) = -1; 3266 else mumps->ICNTL26 = -1; 3267 PetscFunctionReturn(PETSC_SUCCESS); 3268 } 3269 3270 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 3271 { 3272 Mat St; 3273 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3274 PetscScalar *array; 3275 PetscInt i, j, N = mumps->id.size_schur; 3276 3277 PetscFunctionBegin; 3278 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 3279 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 3280 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 3281 PetscCall(MatSetType(St, MATDENSE)); 3282 PetscCall(MatSetUp(St)); 3283 PetscCall(MatDenseGetArray(St, &array)); 3284 if (!mumps->sym) { /* MUMPS always return a full matrix */ 3285 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 3286 for (i = 0; i < N; i++) { 3287 for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3288 } 3289 } else { /* stored by columns */ 3290 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3291 } 3292 } else { /* either full or lower-triangular (not packed) */ 3293 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 3294 for (i = 0; i < N; i++) { 3295 for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3296 } 3297 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 3298 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3299 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 3300 for (i = 0; i < N; i++) { 3301 for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3302 } 3303 } 3304 } 3305 PetscCall(MatDenseRestoreArray(St, &array)); 3306 *S = St; 3307 PetscFunctionReturn(PETSC_SUCCESS); 3308 } 3309 3310 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 3311 { 3312 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3313 3314 PetscFunctionBegin; 3315 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 3316 PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 3317 for (i = 0; i < nICNTL_pre; ++i) 3318 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 3319 if (i == nICNTL_pre) { /* not already cached */ 3320 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 3321 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 3322 mumps->ICNTL_pre[0]++; 3323 } 3324 mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl; 3325 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 3326 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 3327 PetscFunctionReturn(PETSC_SUCCESS); 3328 } 3329 3330 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 3331 { 3332 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3333 3334 PetscFunctionBegin; 3335 if (mumps->id.job == JOB_NULL) { 3336 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 3337 *ival = 0; 3338 for (i = 0; i < nICNTL_pre; ++i) { 3339 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 3340 } 3341 } else *ival = mumps->id.ICNTL(icntl); 3342 PetscFunctionReturn(PETSC_SUCCESS); 3343 } 3344 3345 /*@ 3346 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3347 3348 Logically Collective 3349 3350 Input Parameters: 3351 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3352 . icntl - index of MUMPS parameter array `ICNTL()` 3353 - ival - value of MUMPS `ICNTL(icntl)` 3354 3355 Options Database Key: 3356 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival` 3357 3358 Level: beginner 3359 3360 Note: 3361 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3362 3363 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3364 @*/ 3365 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 3366 { 3367 PetscFunctionBegin; 3368 PetscValidType(F, 1); 3369 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3370 PetscValidLogicalCollectiveInt(F, icntl, 2); 3371 PetscValidLogicalCollectiveInt(F, ival, 3); 3372 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3373 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 3374 PetscFunctionReturn(PETSC_SUCCESS); 3375 } 3376 3377 /*@ 3378 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3379 3380 Logically Collective 3381 3382 Input Parameters: 3383 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3384 - icntl - index of MUMPS parameter array ICNTL() 3385 3386 Output Parameter: 3387 . ival - value of MUMPS ICNTL(icntl) 3388 3389 Level: beginner 3390 3391 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3392 @*/ 3393 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 3394 { 3395 PetscFunctionBegin; 3396 PetscValidType(F, 1); 3397 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3398 PetscValidLogicalCollectiveInt(F, icntl, 2); 3399 PetscAssertPointer(ival, 3); 3400 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3401 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3402 PetscFunctionReturn(PETSC_SUCCESS); 3403 } 3404 3405 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 3406 { 3407 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3408 3409 PetscFunctionBegin; 3410 if (mumps->id.job == JOB_NULL) { 3411 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3412 for (i = 0; i < nCNTL_pre; ++i) 3413 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 3414 if (i == nCNTL_pre) { 3415 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 3416 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 3417 mumps->CNTL_pre[0]++; 3418 } 3419 mumps->CNTL_pre[1 + 2 * i] = icntl; 3420 mumps->CNTL_pre[2 + 2 * i] = val; 3421 } else ID_CNTL_SET(mumps->id, icntl, val); 3422 PetscFunctionReturn(PETSC_SUCCESS); 3423 } 3424 3425 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 3426 { 3427 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3428 3429 PetscFunctionBegin; 3430 if (mumps->id.job == JOB_NULL) { 3431 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3432 *val = 0.0; 3433 for (i = 0; i < nCNTL_pre; ++i) { 3434 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 3435 } 3436 } else *val = ID_CNTL_GET(mumps->id, icntl); 3437 PetscFunctionReturn(PETSC_SUCCESS); 3438 } 3439 3440 /*@ 3441 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3442 3443 Logically Collective 3444 3445 Input Parameters: 3446 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3447 . icntl - index of MUMPS parameter array `CNTL()` 3448 - val - value of MUMPS `CNTL(icntl)` 3449 3450 Options Database Key: 3451 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 3452 3453 Level: beginner 3454 3455 Note: 3456 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3457 3458 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3459 @*/ 3460 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 3461 { 3462 PetscFunctionBegin; 3463 PetscValidType(F, 1); 3464 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3465 PetscValidLogicalCollectiveInt(F, icntl, 2); 3466 PetscValidLogicalCollectiveReal(F, val, 3); 3467 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3468 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 3469 PetscFunctionReturn(PETSC_SUCCESS); 3470 } 3471 3472 /*@ 3473 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3474 3475 Logically Collective 3476 3477 Input Parameters: 3478 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3479 - icntl - index of MUMPS parameter array CNTL() 3480 3481 Output Parameter: 3482 . val - value of MUMPS CNTL(icntl) 3483 3484 Level: beginner 3485 3486 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3487 @*/ 3488 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 3489 { 3490 PetscFunctionBegin; 3491 PetscValidType(F, 1); 3492 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3493 PetscValidLogicalCollectiveInt(F, icntl, 2); 3494 PetscAssertPointer(val, 3); 3495 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3496 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3497 PetscFunctionReturn(PETSC_SUCCESS); 3498 } 3499 3500 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 3501 { 3502 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3503 3504 PetscFunctionBegin; 3505 *info = mumps->id.INFO(icntl); 3506 PetscFunctionReturn(PETSC_SUCCESS); 3507 } 3508 3509 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 3510 { 3511 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3512 3513 PetscFunctionBegin; 3514 *infog = mumps->id.INFOG(icntl); 3515 PetscFunctionReturn(PETSC_SUCCESS); 3516 } 3517 3518 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 3519 { 3520 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3521 3522 PetscFunctionBegin; 3523 *rinfo = ID_RINFO_GET(mumps->id, icntl); 3524 PetscFunctionReturn(PETSC_SUCCESS); 3525 } 3526 3527 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 3528 { 3529 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3530 3531 PetscFunctionBegin; 3532 *rinfog = ID_RINFOG_GET(mumps->id, icntl); 3533 PetscFunctionReturn(PETSC_SUCCESS); 3534 } 3535 3536 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 3537 { 3538 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3539 3540 PetscFunctionBegin; 3541 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 3542 *size = 0; 3543 *array = NULL; 3544 if (!mumps->myid) { 3545 *size = mumps->id.INFOG(28); 3546 PetscCall(PetscMalloc1(*size, array)); 3547 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 3548 } 3549 PetscFunctionReturn(PETSC_SUCCESS); 3550 } 3551 3552 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 3553 { 3554 Mat Bt = NULL, Btseq = NULL; 3555 PetscBool flg; 3556 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3557 PetscScalar *aa; 3558 PetscInt spnr, *ia, *ja, M, nrhs; 3559 3560 PetscFunctionBegin; 3561 PetscAssertPointer(spRHS, 2); 3562 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3563 PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3564 PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 3565 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3566 3567 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3568 3569 if (mumps->petsc_size > 1) { 3570 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3571 Btseq = b->A; 3572 } else { 3573 Btseq = Bt; 3574 } 3575 3576 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3577 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 3578 PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs)); 3579 mumps->id.rhs = NULL; 3580 3581 if (!mumps->myid) { 3582 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3583 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3584 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3585 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3586 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 3587 } else { 3588 mumps->id.irhs_ptr = NULL; 3589 mumps->id.irhs_sparse = NULL; 3590 mumps->id.nz_rhs = 0; 3591 if (mumps->id.rhs_sparse_len) { 3592 PetscCall(PetscFree(mumps->id.rhs_sparse)); 3593 mumps->id.rhs_sparse_len = 0; 3594 } 3595 } 3596 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3597 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3598 3599 /* solve phase */ 3600 mumps->id.job = JOB_SOLVE; 3601 PetscMUMPS_c(mumps); 3602 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3603 3604 if (!mumps->myid) { 3605 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3606 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3607 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3608 } 3609 PetscFunctionReturn(PETSC_SUCCESS); 3610 } 3611 3612 /*@ 3613 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3614 3615 Logically Collective 3616 3617 Input Parameter: 3618 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3619 3620 Output Parameter: 3621 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3622 3623 Level: beginner 3624 3625 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3626 @*/ 3627 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3628 { 3629 PetscFunctionBegin; 3630 PetscValidType(F, 1); 3631 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3632 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3633 PetscFunctionReturn(PETSC_SUCCESS); 3634 } 3635 3636 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3637 { 3638 Mat spRHS; 3639 3640 PetscFunctionBegin; 3641 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3642 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3643 PetscCall(MatDestroy(&spRHS)); 3644 PetscFunctionReturn(PETSC_SUCCESS); 3645 } 3646 3647 /*@ 3648 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3649 3650 Logically Collective 3651 3652 Input Parameter: 3653 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3654 3655 Output Parameter: 3656 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3657 3658 Level: beginner 3659 3660 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3661 @*/ 3662 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3663 { 3664 PetscBool flg; 3665 3666 PetscFunctionBegin; 3667 PetscValidType(F, 1); 3668 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3669 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3670 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3671 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3672 PetscFunctionReturn(PETSC_SUCCESS); 3673 } 3674 3675 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3676 { 3677 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3678 3679 PetscFunctionBegin; 3680 if (nblk) { 3681 PetscAssertPointer(blkptr, 4); 3682 PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk)); 3683 PetscCall(PetscFree(mumps->id.blkptr)); 3684 PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr)); 3685 for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i)); 3686 // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15). 3687 // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS(). 3688 // mumps->id.ICNTL(15) = 1; 3689 if (blkvar) { 3690 PetscCall(PetscFree(mumps->id.blkvar)); 3691 PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar)); 3692 for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i)); 3693 } 3694 } else { 3695 PetscCall(PetscFree(mumps->id.blkptr)); 3696 PetscCall(PetscFree(mumps->id.blkvar)); 3697 // mumps->id.ICNTL(15) = 0; 3698 mumps->id.nblk = 0; 3699 } 3700 PetscFunctionReturn(PETSC_SUCCESS); 3701 } 3702 3703 /*@ 3704 MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1` 3705 3706 Not collective, only relevant on the first process of the MPI communicator 3707 3708 Input Parameters: 3709 + F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3710 . nblk - the number of blocks 3711 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk` 3712 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks 3713 3714 Level: advanced 3715 3716 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()` 3717 @*/ 3718 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3719 { 3720 PetscFunctionBegin; 3721 PetscValidType(F, 1); 3722 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3723 PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr)); 3724 PetscFunctionReturn(PETSC_SUCCESS); 3725 } 3726 3727 /*@ 3728 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3729 3730 Logically Collective 3731 3732 Input Parameters: 3733 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3734 - icntl - index of MUMPS parameter array INFO() 3735 3736 Output Parameter: 3737 . ival - value of MUMPS INFO(icntl) 3738 3739 Level: beginner 3740 3741 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3742 @*/ 3743 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3744 { 3745 PetscFunctionBegin; 3746 PetscValidType(F, 1); 3747 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3748 PetscAssertPointer(ival, 3); 3749 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3750 PetscFunctionReturn(PETSC_SUCCESS); 3751 } 3752 3753 /*@ 3754 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3755 3756 Logically Collective 3757 3758 Input Parameters: 3759 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3760 - icntl - index of MUMPS parameter array INFOG() 3761 3762 Output Parameter: 3763 . ival - value of MUMPS INFOG(icntl) 3764 3765 Level: beginner 3766 3767 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3768 @*/ 3769 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3770 { 3771 PetscFunctionBegin; 3772 PetscValidType(F, 1); 3773 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3774 PetscAssertPointer(ival, 3); 3775 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3776 PetscFunctionReturn(PETSC_SUCCESS); 3777 } 3778 3779 /*@ 3780 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3781 3782 Logically Collective 3783 3784 Input Parameters: 3785 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3786 - icntl - index of MUMPS parameter array RINFO() 3787 3788 Output Parameter: 3789 . val - value of MUMPS RINFO(icntl) 3790 3791 Level: beginner 3792 3793 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3794 @*/ 3795 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3796 { 3797 PetscFunctionBegin; 3798 PetscValidType(F, 1); 3799 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3800 PetscAssertPointer(val, 3); 3801 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3802 PetscFunctionReturn(PETSC_SUCCESS); 3803 } 3804 3805 /*@ 3806 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3807 3808 Logically Collective 3809 3810 Input Parameters: 3811 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3812 - icntl - index of MUMPS parameter array RINFOG() 3813 3814 Output Parameter: 3815 . val - value of MUMPS RINFOG(icntl) 3816 3817 Level: beginner 3818 3819 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3820 @*/ 3821 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3822 { 3823 PetscFunctionBegin; 3824 PetscValidType(F, 1); 3825 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3826 PetscAssertPointer(val, 3); 3827 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3828 PetscFunctionReturn(PETSC_SUCCESS); 3829 } 3830 3831 /*@ 3832 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3833 3834 Logically Collective 3835 3836 Input Parameter: 3837 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3838 3839 Output Parameters: 3840 + size - local size of the array. The size of the array is non-zero only on MPI rank 0 3841 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3842 for freeing this array. 3843 3844 Level: beginner 3845 3846 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3847 @*/ 3848 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3849 { 3850 PetscFunctionBegin; 3851 PetscValidType(F, 1); 3852 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3853 PetscAssertPointer(size, 2); 3854 PetscAssertPointer(array, 3); 3855 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3856 PetscFunctionReturn(PETSC_SUCCESS); 3857 } 3858 3859 /*MC 3860 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3861 MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3862 3863 Works with `MATAIJ` and `MATSBAIJ` matrices 3864 3865 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3866 3867 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3868 See details below. 3869 3870 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3871 3872 Options Database Keys: 3873 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3874 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3875 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3876 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3877 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3878 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3879 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3880 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3881 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3882 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3883 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3884 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3885 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3886 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3887 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3888 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3889 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3890 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3891 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3892 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3893 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3894 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 3895 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3896 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3897 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3898 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3899 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3900 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3901 . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB) 3902 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3903 . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3904 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3905 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3906 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3907 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3908 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3909 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3910 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3911 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3912 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3913 3914 Level: beginner 3915 3916 Notes: 3917 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3918 error if the matrix is Hermitian. 3919 3920 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3921 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3922 3923 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3924 the failure with 3925 .vb 3926 KSPGetPC(ksp,&pc); 3927 PCFactorGetMatrix(pc,&mat); 3928 MatMumpsGetInfo(mat,....); 3929 MatMumpsGetInfog(mat,....); etc. 3930 .ve 3931 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3932 3933 MUMPS provides 64-bit integer support in two build modes: 3934 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3935 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3936 3937 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3938 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3939 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3940 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3941 3942 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3943 3944 Two modes to run MUMPS/PETSc with OpenMP 3945 .vb 3946 Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3947 threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test". 3948 .ve 3949 3950 .vb 3951 `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example, 3952 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3953 .ve 3954 3955 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3956 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3957 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3958 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3959 (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided). 3960 3961 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3962 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3963 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3964 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3965 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3966 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3967 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3968 MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3969 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3970 problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding. 3971 For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and 3972 examine the mapping result. 3973 3974 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3975 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3976 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3977 3978 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3979 3980 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3981 M*/ 3982 3983 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3984 { 3985 PetscFunctionBegin; 3986 *type = MATSOLVERMUMPS; 3987 PetscFunctionReturn(PETSC_SUCCESS); 3988 } 3989 3990 /* MatGetFactor for Seq and MPI AIJ matrices */ 3991 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 3992 { 3993 Mat B; 3994 Mat_MUMPS *mumps; 3995 PetscBool isSeqAIJ, isDiag, isDense; 3996 PetscMPIInt size; 3997 3998 PetscFunctionBegin; 3999 #if defined(PETSC_USE_COMPLEX) 4000 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4001 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4002 *F = NULL; 4003 PetscFunctionReturn(PETSC_SUCCESS); 4004 } 4005 #endif 4006 /* Create the factorization matrix */ 4007 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 4008 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 4009 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4010 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4011 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4012 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4013 PetscCall(MatSetUp(B)); 4014 4015 PetscCall(PetscNew(&mumps)); 4016 4017 B->ops->view = MatView_MUMPS; 4018 B->ops->getinfo = MatGetInfo_MUMPS; 4019 4020 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4021 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4022 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4023 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4024 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4025 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4026 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4027 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4028 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4029 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4030 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4031 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4032 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4033 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4034 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4035 4036 if (ftype == MAT_FACTOR_LU) { 4037 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4038 B->factortype = MAT_FACTOR_LU; 4039 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 4040 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4041 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4042 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 4043 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4044 mumps->sym = 0; 4045 } else { 4046 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4047 B->factortype = MAT_FACTOR_CHOLESKY; 4048 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 4049 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4050 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4051 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 4052 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4053 #if defined(PETSC_USE_COMPLEX) 4054 mumps->sym = 2; 4055 #else 4056 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4057 else mumps->sym = 2; 4058 #endif 4059 } 4060 4061 /* set solvertype */ 4062 PetscCall(PetscFree(B->solvertype)); 4063 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4064 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4065 if (size == 1) { 4066 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4067 B->canuseordering = PETSC_TRUE; 4068 } 4069 B->ops->destroy = MatDestroy_MUMPS; 4070 B->data = (void *)mumps; 4071 4072 *F = B; 4073 mumps->id.job = JOB_NULL; 4074 mumps->ICNTL_pre = NULL; 4075 mumps->CNTL_pre = NULL; 4076 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4077 PetscFunctionReturn(PETSC_SUCCESS); 4078 } 4079 4080 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 4081 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 4082 { 4083 Mat B; 4084 Mat_MUMPS *mumps; 4085 PetscBool isSeqSBAIJ; 4086 PetscMPIInt size; 4087 4088 PetscFunctionBegin; 4089 #if defined(PETSC_USE_COMPLEX) 4090 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4091 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4092 *F = NULL; 4093 PetscFunctionReturn(PETSC_SUCCESS); 4094 } 4095 #endif 4096 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4097 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4098 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4099 PetscCall(MatSetUp(B)); 4100 4101 PetscCall(PetscNew(&mumps)); 4102 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 4103 if (isSeqSBAIJ) { 4104 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 4105 } else { 4106 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 4107 } 4108 4109 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4110 B->ops->view = MatView_MUMPS; 4111 B->ops->getinfo = MatGetInfo_MUMPS; 4112 4113 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4114 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4115 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4116 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4117 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4118 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4119 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4120 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4121 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4122 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4123 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4124 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4125 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4126 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4127 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4128 4129 B->factortype = MAT_FACTOR_CHOLESKY; 4130 #if defined(PETSC_USE_COMPLEX) 4131 mumps->sym = 2; 4132 #else 4133 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4134 else mumps->sym = 2; 4135 #endif 4136 4137 /* set solvertype */ 4138 PetscCall(PetscFree(B->solvertype)); 4139 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4140 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4141 if (size == 1) { 4142 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4143 B->canuseordering = PETSC_TRUE; 4144 } 4145 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4146 B->ops->destroy = MatDestroy_MUMPS; 4147 B->data = (void *)mumps; 4148 4149 *F = B; 4150 mumps->id.job = JOB_NULL; 4151 mumps->ICNTL_pre = NULL; 4152 mumps->CNTL_pre = NULL; 4153 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4154 PetscFunctionReturn(PETSC_SUCCESS); 4155 } 4156 4157 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 4158 { 4159 Mat B; 4160 Mat_MUMPS *mumps; 4161 PetscBool isSeqBAIJ; 4162 PetscMPIInt size; 4163 4164 PetscFunctionBegin; 4165 /* Create the factorization matrix */ 4166 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 4167 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4168 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4169 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4170 PetscCall(MatSetUp(B)); 4171 4172 PetscCall(PetscNew(&mumps)); 4173 PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 4174 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 4175 B->factortype = MAT_FACTOR_LU; 4176 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 4177 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 4178 mumps->sym = 0; 4179 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4180 4181 B->ops->view = MatView_MUMPS; 4182 B->ops->getinfo = MatGetInfo_MUMPS; 4183 4184 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4185 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4186 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4187 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4188 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4189 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4190 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4191 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4192 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4193 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4194 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4195 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4198 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4199 4200 /* set solvertype */ 4201 PetscCall(PetscFree(B->solvertype)); 4202 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4203 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4204 if (size == 1) { 4205 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4206 B->canuseordering = PETSC_TRUE; 4207 } 4208 B->ops->destroy = MatDestroy_MUMPS; 4209 B->data = (void *)mumps; 4210 4211 *F = B; 4212 mumps->id.job = JOB_NULL; 4213 mumps->ICNTL_pre = NULL; 4214 mumps->CNTL_pre = NULL; 4215 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4216 PetscFunctionReturn(PETSC_SUCCESS); 4217 } 4218 4219 /* MatGetFactor for Seq and MPI SELL matrices */ 4220 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 4221 { 4222 Mat B; 4223 Mat_MUMPS *mumps; 4224 PetscBool isSeqSELL; 4225 PetscMPIInt size; 4226 4227 PetscFunctionBegin; 4228 /* Create the factorization matrix */ 4229 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 4230 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4231 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4232 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4233 PetscCall(MatSetUp(B)); 4234 4235 PetscCall(PetscNew(&mumps)); 4236 4237 B->ops->view = MatView_MUMPS; 4238 B->ops->getinfo = MatGetInfo_MUMPS; 4239 4240 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4241 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4242 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4243 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4244 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4245 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4246 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4247 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4248 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4249 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4250 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4251 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4252 4253 PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4254 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4255 B->factortype = MAT_FACTOR_LU; 4256 PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4257 mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 4258 mumps->sym = 0; 4259 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4260 4261 /* set solvertype */ 4262 PetscCall(PetscFree(B->solvertype)); 4263 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4264 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4265 if (size == 1) { 4266 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4267 B->canuseordering = PETSC_TRUE; 4268 } 4269 B->ops->destroy = MatDestroy_MUMPS; 4270 B->data = (void *)mumps; 4271 4272 *F = B; 4273 mumps->id.job = JOB_NULL; 4274 mumps->ICNTL_pre = NULL; 4275 mumps->CNTL_pre = NULL; 4276 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4277 PetscFunctionReturn(PETSC_SUCCESS); 4278 } 4279 4280 /* MatGetFactor for MATNEST matrices */ 4281 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 4282 { 4283 Mat B, **mats; 4284 Mat_MUMPS *mumps; 4285 PetscInt nr, nc; 4286 PetscMPIInt size; 4287 PetscBool flg = PETSC_TRUE; 4288 4289 PetscFunctionBegin; 4290 #if defined(PETSC_USE_COMPLEX) 4291 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4292 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4293 *F = NULL; 4294 PetscFunctionReturn(PETSC_SUCCESS); 4295 } 4296 #endif 4297 4298 /* Return if some condition is not satisfied */ 4299 *F = NULL; 4300 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 4301 if (ftype == MAT_FACTOR_CHOLESKY) { 4302 IS *rows, *cols; 4303 PetscInt *m, *M; 4304 4305 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 4306 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 4307 PetscCall(MatNestGetISs(A, rows, cols)); 4308 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 4309 if (!flg) { 4310 PetscCall(PetscFree2(rows, cols)); 4311 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 4312 PetscFunctionReturn(PETSC_SUCCESS); 4313 } 4314 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 4315 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 4316 for (PetscInt r = 0; flg && r < nr; r++) 4317 for (PetscInt k = r + 1; flg && k < nr; k++) 4318 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 4319 PetscCall(PetscFree2(m, M)); 4320 PetscCall(PetscFree2(rows, cols)); 4321 if (!flg) { 4322 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 4323 PetscFunctionReturn(PETSC_SUCCESS); 4324 } 4325 } 4326 4327 for (PetscInt r = 0; r < nr; r++) { 4328 for (PetscInt c = 0; c < nc; c++) { 4329 Mat sub = mats[r][c]; 4330 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 4331 4332 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 4333 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 4334 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 4335 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 4336 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 4337 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 4338 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 4339 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 4340 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 4341 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4342 if (ftype == MAT_FACTOR_CHOLESKY) { 4343 if (r == c) { 4344 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 4345 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4346 flg = PETSC_FALSE; 4347 } 4348 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4349 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4350 flg = PETSC_FALSE; 4351 } 4352 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4353 PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 4354 flg = PETSC_FALSE; 4355 } 4356 } 4357 } 4358 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 4359 4360 /* Create the factorization matrix */ 4361 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4362 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4363 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4364 PetscCall(MatSetUp(B)); 4365 4366 PetscCall(PetscNew(&mumps)); 4367 4368 B->ops->view = MatView_MUMPS; 4369 B->ops->getinfo = MatGetInfo_MUMPS; 4370 4371 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4372 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4373 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4374 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4375 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4376 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4377 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4378 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4379 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4380 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4381 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4382 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4383 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4384 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4385 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4386 4387 if (ftype == MAT_FACTOR_LU) { 4388 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4389 B->factortype = MAT_FACTOR_LU; 4390 mumps->sym = 0; 4391 } else { 4392 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4393 B->factortype = MAT_FACTOR_CHOLESKY; 4394 #if defined(PETSC_USE_COMPLEX) 4395 mumps->sym = 2; 4396 #else 4397 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4398 else mumps->sym = 2; 4399 #endif 4400 } 4401 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 4402 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 4403 4404 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4405 if (size == 1) { 4406 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4407 B->canuseordering = PETSC_TRUE; 4408 } 4409 4410 /* set solvertype */ 4411 PetscCall(PetscFree(B->solvertype)); 4412 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4413 B->ops->destroy = MatDestroy_MUMPS; 4414 B->data = (void *)mumps; 4415 4416 *F = B; 4417 mumps->id.job = JOB_NULL; 4418 mumps->ICNTL_pre = NULL; 4419 mumps->CNTL_pre = NULL; 4420 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4421 PetscFunctionReturn(PETSC_SUCCESS); 4422 } 4423 4424 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 4425 { 4426 PetscFunctionBegin; 4427 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4428 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4429 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4430 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4431 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4432 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4433 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4434 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4435 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4436 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4437 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 4438 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4439 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4440 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4441 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4442 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4443 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4444 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 4445 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 4446 PetscFunctionReturn(PETSC_SUCCESS); 4447 } 4448