1 /* 2 Provides an interface to the MUMPS sparse solver 3 */ 4 #include <petscpkg_version.h> 5 #include <petscsf.h> 6 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 7 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h> 8 #include <../src/mat/impls/sell/mpi/mpisell.h> 9 #include <petsc/private/vecimpl.h> 10 11 #define MUMPS_MANUALS "(see users manual https://mumps-solver.org/index.php?page=doc \"Error and warning diagnostics\")" 12 13 EXTERN_C_BEGIN 14 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 15 #include <cmumps_c.h> 16 #include <zmumps_c.h> 17 #include <smumps_c.h> 18 #include <dmumps_c.h> 19 #else 20 #if defined(PETSC_USE_COMPLEX) 21 #if defined(PETSC_USE_REAL_SINGLE) 22 #include <cmumps_c.h> 23 #define MUMPS_c cmumps_c 24 #define MumpsScalar CMUMPS_COMPLEX 25 #else 26 #include <zmumps_c.h> 27 #define MUMPS_c zmumps_c 28 #define MumpsScalar ZMUMPS_COMPLEX 29 #endif 30 #else 31 #if defined(PETSC_USE_REAL_SINGLE) 32 #include <smumps_c.h> 33 #define MUMPS_c smumps_c 34 #define MumpsScalar SMUMPS_REAL 35 #else 36 #include <dmumps_c.h> 37 #define MUMPS_c dmumps_c 38 #define MumpsScalar DMUMPS_REAL 39 #endif 40 #endif 41 #endif 42 #if defined(PETSC_USE_COMPLEX) 43 #if defined(PETSC_USE_REAL_SINGLE) 44 #define MUMPS_STRUC_C CMUMPS_STRUC_C 45 #else 46 #define MUMPS_STRUC_C ZMUMPS_STRUC_C 47 #endif 48 #else 49 #if defined(PETSC_USE_REAL_SINGLE) 50 #define MUMPS_STRUC_C SMUMPS_STRUC_C 51 #else 52 #define MUMPS_STRUC_C DMUMPS_STRUC_C 53 #endif 54 #endif 55 EXTERN_C_END 56 57 #define JOB_INIT -1 58 #define JOB_NULL 0 59 #define JOB_FACTSYMBOLIC 1 60 #define JOB_FACTNUMERIC 2 61 #define JOB_SOLVE 3 62 #define JOB_END -2 63 64 /* MUMPS uses MUMPS_INT for nonzero indices such as irn/jcn, irn_loc/jcn_loc and uses int64_t for 65 number of nonzeros such as nnz, nnz_loc. We typedef MUMPS_INT to PetscMUMPSInt to follow the 66 naming convention in PetscMPIInt, PetscBLASInt etc. 67 */ 68 typedef MUMPS_INT PetscMUMPSInt; 69 70 #if PETSC_PKG_MUMPS_VERSION_GE(5, 3, 0) 71 #if defined(MUMPS_INTSIZE64) /* MUMPS_INTSIZE64 is in MUMPS headers if it is built in full 64-bit mode, therefore the macro is more reliable */ 72 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 73 #endif 74 #else 75 #if defined(INTSIZE64) /* INTSIZE64 is a command line macro one used to build MUMPS in full 64-bit mode */ 76 #error "PETSc has not been tested with full 64-bit MUMPS and we choose to error out" 77 #endif 78 #endif 79 80 #define MPIU_MUMPSINT MPI_INT 81 #define PETSC_MUMPS_INT_MAX 2147483647 82 #define PETSC_MUMPS_INT_MIN -2147483648 83 84 /* Cast PetscInt to PetscMUMPSInt. Usually there is no overflow since <a> is row/col indices or some small integers*/ 85 static inline PetscErrorCode PetscMUMPSIntCast(PetscCount a, PetscMUMPSInt *b) 86 { 87 PetscFunctionBegin; 88 #if PetscDefined(USE_64BIT_INDICES) 89 PetscAssert(a <= PETSC_MUMPS_INT_MAX && a >= PETSC_MUMPS_INT_MIN, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 90 #endif 91 *b = (PetscMUMPSInt)a; 92 PetscFunctionReturn(PETSC_SUCCESS); 93 } 94 95 /* Put these utility routines here since they are only used in this file */ 96 static inline PetscErrorCode PetscOptionsMUMPSInt_Private(PetscOptionItems PetscOptionsObject, const char opt[], const char text[], const char man[], PetscMUMPSInt currentvalue, PetscMUMPSInt *value, PetscBool *set, PetscMUMPSInt lb, PetscMUMPSInt ub) 97 { 98 PetscInt myval; 99 PetscBool myset; 100 101 PetscFunctionBegin; 102 /* PetscInt's size should be always >= PetscMUMPSInt's. It is safe to call PetscOptionsInt_Private to read a PetscMUMPSInt */ 103 PetscCall(PetscOptionsInt_Private(PetscOptionsObject, opt, text, man, (PetscInt)currentvalue, &myval, &myset, lb, ub)); 104 if (myset) PetscCall(PetscMUMPSIntCast(myval, value)); 105 if (set) *set = myset; 106 PetscFunctionReturn(PETSC_SUCCESS); 107 } 108 #define PetscOptionsMUMPSInt(a, b, c, d, e, f) PetscOptionsMUMPSInt_Private(PetscOptionsObject, a, b, c, d, e, f, PETSC_MUMPS_INT_MIN, PETSC_MUMPS_INT_MAX) 109 110 // An abstract type for specific MUMPS types {S,D,C,Z}MUMPS_STRUC_C. 111 // 112 // With the abstract (outer) type, we can write shared code. We call MUMPS through a type-to-be-determined inner field within the abstract type. 113 // Before/after calling MUMPS, we need to copy in/out fields between the outer and the inner, which seems expensive. But note that the large fixed size 114 // arrays within the types are directly linked. At the end, we only need to copy ~20 intergers/pointers, which is doable. See PreMumpsCall()/PostMumpsCall(). 115 // 116 // Not all fields in the specific types are exposed in the abstract type. We only need those used by the PETSc/MUMPS interface. 117 // Notably, DMUMPS_COMPLEX* and DMUMPS_REAL* fields are now declared as void *. Their type will be determined by the the actual precision to be used. 118 // Also note that we added some *_len fields not in specific types to track sizes of those MumpsScalar buffers. 119 typedef struct { 120 PetscPrecision precision; // precision used by MUMPS 121 void *internal_id; // the data structure passed to MUMPS, whose actual type {S,D,C,Z}MUMPS_STRUC_C is to be decided by precision and PETSc's use of complex 122 123 // aliased fields from internal_id, so that we can use XMUMPS_STRUC_C to write shared code across different precisions. 124 MUMPS_INT sym, par, job; 125 MUMPS_INT comm_fortran; /* Fortran communicator */ 126 MUMPS_INT *icntl; 127 void *cntl; // MumpsReal, fixed size array 128 MUMPS_INT n; 129 MUMPS_INT nblk; 130 131 /* Assembled entry */ 132 MUMPS_INT8 nnz; 133 MUMPS_INT *irn; 134 MUMPS_INT *jcn; 135 void *a; // MumpsScalar, centralized input 136 PetscCount a_len; 137 138 /* Distributed entry */ 139 MUMPS_INT8 nnz_loc; 140 MUMPS_INT *irn_loc; 141 MUMPS_INT *jcn_loc; 142 void *a_loc; // MumpsScalar, distributed input 143 PetscCount a_loc_len; 144 145 /* Matrix by blocks */ 146 MUMPS_INT *blkptr; 147 MUMPS_INT *blkvar; 148 149 /* Ordering, if given by user */ 150 MUMPS_INT *perm_in; 151 152 /* RHS, solution, ouptput data and statistics */ 153 void *rhs, *redrhs, *rhs_sparse, *sol_loc, *rhs_loc; // MumpsScalar buffers 154 PetscCount rhs_len, redrhs_len, rhs_sparse_len, sol_loc_len, rhs_loc_len; // length of buffers (in MumpsScalar) IF allocated in a different precision than PetscScalar 155 156 MUMPS_INT *irhs_sparse, *irhs_ptr, *isol_loc, *irhs_loc; 157 MUMPS_INT nrhs, lrhs, lredrhs, nz_rhs, lsol_loc, nloc_rhs, lrhs_loc; 158 // MUMPS_INT nsol_loc; // introduced in MUMPS-5.7, but PETSc doesn't use it; would cause compile errors with the widely used 5.6. If you add it, must also update PreMumpsCall() and guard this with #if PETSC_PKG_MUMPS_VERSION_GE(5, 7, 0) 159 MUMPS_INT schur_lld; 160 MUMPS_INT *info, *infog; // fixed size array 161 void *rinfo, *rinfog; // MumpsReal, fixed size array 162 163 /* Null space */ 164 MUMPS_INT *pivnul_list; // allocated by MUMPS! 165 MUMPS_INT *mapping; // allocated by MUMPS! 166 167 /* Schur */ 168 MUMPS_INT size_schur; 169 MUMPS_INT *listvar_schur; 170 void *schur; // MumpsScalar 171 PetscCount schur_len; 172 173 /* For out-of-core */ 174 char *ooc_tmpdir; // fixed size array 175 char *ooc_prefix; // fixed size array 176 } XMUMPS_STRUC_C; 177 178 // Note: fixed-size arrays are allocated by MUMPS; redirect them to the outer struct 179 #define AllocatInternalID(MUMPS_STRUC_T, outer) \ 180 do { \ 181 MUMPS_STRUC_T *inner; \ 182 PetscCall(PetscNew(&inner)); \ 183 outer->icntl = inner->icntl; \ 184 outer->cntl = inner->cntl; \ 185 outer->info = inner->info; \ 186 outer->infog = inner->infog; \ 187 outer->rinfo = inner->rinfo; \ 188 outer->rinfog = inner->rinfog; \ 189 outer->ooc_tmpdir = inner->ooc_tmpdir; \ 190 outer->ooc_prefix = inner->ooc_prefix; \ 191 /* the three field should never change after init */ \ 192 inner->comm_fortran = outer->comm_fortran; \ 193 inner->par = outer->par; \ 194 inner->sym = outer->sym; \ 195 outer->internal_id = inner; \ 196 } while (0) 197 198 // Allocate the internal [SDCZ]MUMPS_STRUC_C ID data structure in the given <precision>, and link fields of the outer and the inner 199 static inline PetscErrorCode MatMumpsAllocateInternalID(XMUMPS_STRUC_C *outer, PetscPrecision precision) 200 { 201 PetscFunctionBegin; 202 outer->precision = precision; 203 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 204 #if defined(PETSC_USE_COMPLEX) 205 if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(CMUMPS_STRUC_C, outer); 206 else AllocatInternalID(ZMUMPS_STRUC_C, outer); 207 #else 208 if (precision == PETSC_PRECISION_SINGLE) AllocatInternalID(SMUMPS_STRUC_C, outer); 209 else AllocatInternalID(DMUMPS_STRUC_C, outer); 210 #endif 211 #else 212 AllocatInternalID(MUMPS_STRUC_C, outer); 213 #endif 214 PetscFunctionReturn(PETSC_SUCCESS); 215 } 216 217 #define FreeInternalIDFields(MUMPS_STRUC_T, outer) \ 218 do { \ 219 MUMPS_STRUC_T *inner = (MUMPS_STRUC_T *)(outer)->internal_id; \ 220 PetscCall(PetscFree(inner->a)); \ 221 PetscCall(PetscFree(inner->a_loc)); \ 222 PetscCall(PetscFree(inner->redrhs)); \ 223 PetscCall(PetscFree(inner->rhs)); \ 224 PetscCall(PetscFree(inner->rhs_sparse)); \ 225 PetscCall(PetscFree(inner->rhs_loc)); \ 226 PetscCall(PetscFree(inner->sol_loc)); \ 227 PetscCall(PetscFree(inner->schur)); \ 228 } while (0) 229 230 static inline PetscErrorCode MatMumpsFreeInternalID(XMUMPS_STRUC_C *outer) 231 { 232 PetscFunctionBegin; 233 if (outer->internal_id) { // sometimes, the inner is never created before we destroy the outer 234 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 235 const PetscPrecision mumps_precision = outer->precision; 236 if (mumps_precision != PETSC_SCALAR_PRECISION) { // Free internal buffers if we used mixed precision 237 #if defined(PETSC_USE_COMPLEX) 238 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(CMUMPS_STRUC_C, outer); 239 else FreeInternalIDFields(ZMUMPS_STRUC_C, outer); 240 #else 241 if (mumps_precision == PETSC_PRECISION_SINGLE) FreeInternalIDFields(SMUMPS_STRUC_C, outer); 242 else FreeInternalIDFields(DMUMPS_STRUC_C, outer); 243 #endif 244 } 245 #endif 246 PetscCall(PetscFree(outer->internal_id)); 247 } 248 PetscFunctionReturn(PETSC_SUCCESS); 249 } 250 251 // Make a companion MumpsScalar array (with a given PetscScalar array), to hold at least <n> MumpsScalars in the given <precision> and return the address at <ma>. 252 // <convert> indicates if we need to convert PetscScalars to MumpsScalars after allocating the MumpsScalar array. 253 // (For bravity, we use <ma> for array address and <m> for its length in MumpsScalar, though in code they should be <*ma> and <*m>) 254 // If <ma> already points to a buffer/array, on input <m> should be its length. Note the buffer might be freed if it is not big enough for this request. 255 // 256 // The returned array is a companion, so how it is created depends on if PetscScalar and MumpsScalar are the same. 257 // 1) If they are different, a separate array will be made and its length and address will be provided at <m> and <ma> on output. 258 // 2) Otherwise, <pa> will be returned in <ma>, and <m> will be zero on output. 259 // 260 // 261 // Input parameters: 262 // + convert - whether to do PetscScalar to MumpsScalar conversion 263 // . n - length of the PetscScalar array 264 // . pa - [n]], points to the PetscScalar array 265 // . precision - precision of MumpsScalar 266 // . m - on input, length of an existing MumpsScalar array <ma> if any, otherwise *m is just zero. 267 // - ma - on input, an existing MumpsScalar array if any. 268 // 269 // Output parameters: 270 // + m - length of the MumpsScalar buffer at <ma> if MumpsScalar is different from PetscScalar, otherwise 0 271 // . ma - the MumpsScalar array, which could be an alias of <pa> when the two types are the same. 272 // 273 // Note: 274 // New memory, if allocated, is done via PetscMalloc1(), and is owned by caller. 275 static PetscErrorCode MatMumpsMakeMumpsScalarArray(PetscBool convert, PetscCount n, const PetscScalar *pa, PetscPrecision precision, PetscCount *m, void **ma) 276 { 277 PetscFunctionBegin; 278 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 279 const PetscPrecision mumps_precision = precision; 280 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unsupported precicison (%d). Must be single or double", (int)precision); 281 #if defined(PETSC_USE_COMPLEX) 282 if (mumps_precision != PETSC_SCALAR_PRECISION) { 283 if (mumps_precision == PETSC_PRECISION_SINGLE) { 284 if (*m < n) { 285 PetscCall(PetscFree(*ma)); 286 PetscCall(PetscMalloc1(n, (CMUMPS_COMPLEX **)ma)); 287 *m = n; 288 } 289 if (convert) { 290 CMUMPS_COMPLEX *b = *(CMUMPS_COMPLEX **)ma; 291 for (PetscCount i = 0; i < n; i++) { 292 b[i].r = PetscRealPart(pa[i]); 293 b[i].i = PetscImaginaryPart(pa[i]); 294 } 295 } 296 } else { 297 if (*m < n) { 298 PetscCall(PetscFree(*ma)); 299 PetscCall(PetscMalloc1(n, (ZMUMPS_COMPLEX **)ma)); 300 *m = n; 301 } 302 if (convert) { 303 ZMUMPS_COMPLEX *b = *(ZMUMPS_COMPLEX **)ma; 304 for (PetscCount i = 0; i < n; i++) { 305 b[i].r = PetscRealPart(pa[i]); 306 b[i].i = PetscImaginaryPart(pa[i]); 307 } 308 } 309 } 310 } 311 #else 312 if (mumps_precision != PETSC_SCALAR_PRECISION) { 313 if (mumps_precision == PETSC_PRECISION_SINGLE) { 314 if (*m < n) { 315 PetscCall(PetscFree(*ma)); 316 PetscCall(PetscMalloc1(n, (SMUMPS_REAL **)ma)); 317 *m = n; 318 } 319 if (convert) { 320 SMUMPS_REAL *b = *(SMUMPS_REAL **)ma; 321 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 322 } 323 } else { 324 if (*m < n) { 325 PetscCall(PetscFree(*ma)); 326 PetscCall(PetscMalloc1(n, (DMUMPS_REAL **)ma)); 327 *m = n; 328 } 329 if (convert) { 330 DMUMPS_REAL *b = *(DMUMPS_REAL **)ma; 331 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 332 } 333 } 334 } 335 #endif 336 else 337 #endif 338 { 339 if (*m != 0) PetscCall(PetscFree(*ma)); // free existing buffer if any 340 *ma = (void *)pa; // same precision, make them alias 341 *m = 0; 342 } 343 PetscFunctionReturn(PETSC_SUCCESS); 344 } 345 346 // Cast a MumpsScalar array <ma[n]> in <mumps_precision> to a PetscScalar array at address <pa>. 347 // 348 // 1) If the two types are different, cast array elements. 349 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 350 static PetscErrorCode MatMumpsCastMumpsScalarArray(PetscCount n, PetscPrecision mumps_precision, const void *ma, PetscScalar *pa) 351 { 352 PetscFunctionBegin; 353 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 354 if (mumps_precision != PETSC_SCALAR_PRECISION) { 355 #if defined(PETSC_USE_COMPLEX) 356 if (mumps_precision == PETSC_PRECISION_SINGLE) { 357 PetscReal *a = (PetscReal *)pa; 358 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 359 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 360 } else { 361 PetscReal *a = (PetscReal *)pa; 362 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 363 for (PetscCount i = 0; i < 2 * n; i++) a[i] = b[i]; 364 } 365 #else 366 if (mumps_precision == PETSC_PRECISION_SINGLE) { 367 const SMUMPS_REAL *b = (const SMUMPS_REAL *)ma; 368 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 369 } else { 370 const DMUMPS_REAL *b = (const DMUMPS_REAL *)ma; 371 for (PetscCount i = 0; i < n; i++) pa[i] = b[i]; 372 } 373 #endif 374 } else 375 #endif 376 PetscCall(PetscArraycpy((PetscScalar *)pa, (PetscScalar *)ma, n)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 // Cast a PetscScalar array <pa[n]> to a MumpsScalar array in the given <mumps_precision> at address <ma>. 381 // 382 // 1) If the two types are different, cast array elements. 383 // 2) Otherwise, this works as a memcpy; of course, if the two addresses are equal, it is a no-op. 384 static PetscErrorCode MatMumpsCastPetscScalarArray(PetscCount n, const PetscScalar *pa, PetscPrecision mumps_precision, const void *ma) 385 { 386 PetscFunctionBegin; 387 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 388 if (mumps_precision != PETSC_SCALAR_PRECISION) { 389 #if defined(PETSC_USE_COMPLEX) 390 if (mumps_precision == PETSC_PRECISION_SINGLE) { 391 CMUMPS_COMPLEX *b = (CMUMPS_COMPLEX *)ma; 392 for (PetscCount i = 0; i < n; i++) { 393 b[i].r = PetscRealPart(pa[i]); 394 b[i].i = PetscImaginaryPart(pa[i]); 395 } 396 } else { 397 ZMUMPS_COMPLEX *b = (ZMUMPS_COMPLEX *)ma; 398 for (PetscCount i = 0; i < n; i++) { 399 b[i].r = PetscRealPart(pa[i]); 400 b[i].i = PetscImaginaryPart(pa[i]); 401 } 402 } 403 #else 404 if (mumps_precision == PETSC_PRECISION_SINGLE) { 405 SMUMPS_REAL *b = (SMUMPS_REAL *)ma; 406 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 407 } else { 408 DMUMPS_REAL *b = (DMUMPS_REAL *)ma; 409 for (PetscCount i = 0; i < n; i++) b[i] = pa[i]; 410 } 411 #endif 412 } else 413 #endif 414 PetscCall(PetscArraycpy((PetscScalar *)ma, (PetscScalar *)pa, n)); 415 PetscFunctionReturn(PETSC_SUCCESS); 416 } 417 418 static inline MPI_Datatype MPIU_MUMPSREAL(const XMUMPS_STRUC_C *id) 419 { 420 return id->precision == PETSC_PRECISION_DOUBLE ? MPI_DOUBLE : MPI_FLOAT; 421 } 422 423 #define PreMumpsCall(inner, outer, mumpsscalar) \ 424 do { \ 425 inner->job = outer->job; \ 426 inner->n = outer->n; \ 427 inner->nblk = outer->nblk; \ 428 inner->nnz = outer->nnz; \ 429 inner->irn = outer->irn; \ 430 inner->jcn = outer->jcn; \ 431 inner->a = (mumpsscalar *)outer->a; \ 432 inner->nnz_loc = outer->nnz_loc; \ 433 inner->irn_loc = outer->irn_loc; \ 434 inner->jcn_loc = outer->jcn_loc; \ 435 inner->a_loc = (mumpsscalar *)outer->a_loc; \ 436 inner->blkptr = outer->blkptr; \ 437 inner->blkvar = outer->blkvar; \ 438 inner->perm_in = outer->perm_in; \ 439 inner->rhs = (mumpsscalar *)outer->rhs; \ 440 inner->redrhs = (mumpsscalar *)outer->redrhs; \ 441 inner->rhs_sparse = (mumpsscalar *)outer->rhs_sparse; \ 442 inner->sol_loc = (mumpsscalar *)outer->sol_loc; \ 443 inner->rhs_loc = (mumpsscalar *)outer->rhs_loc; \ 444 inner->irhs_sparse = outer->irhs_sparse; \ 445 inner->irhs_ptr = outer->irhs_ptr; \ 446 inner->isol_loc = outer->isol_loc; \ 447 inner->irhs_loc = outer->irhs_loc; \ 448 inner->nrhs = outer->nrhs; \ 449 inner->lrhs = outer->lrhs; \ 450 inner->lredrhs = outer->lredrhs; \ 451 inner->nz_rhs = outer->nz_rhs; \ 452 inner->lsol_loc = outer->lsol_loc; \ 453 inner->nloc_rhs = outer->nloc_rhs; \ 454 inner->lrhs_loc = outer->lrhs_loc; \ 455 inner->schur_lld = outer->schur_lld; \ 456 inner->size_schur = outer->size_schur; \ 457 inner->listvar_schur = outer->listvar_schur; \ 458 inner->schur = (mumpsscalar *)outer->schur; \ 459 } while (0) 460 461 #define PostMumpsCall(inner, outer) \ 462 do { \ 463 outer->pivnul_list = inner->pivnul_list; \ 464 outer->mapping = inner->mapping; \ 465 } while (0) 466 467 // Entry for PETSc to call mumps 468 static inline PetscErrorCode PetscCallMumps_Private(XMUMPS_STRUC_C *outer) 469 { 470 PetscFunctionBegin; 471 #if defined(PETSC_HAVE_MUMPS_MIXED_PRECISION) 472 #if defined(PETSC_USE_COMPLEX) 473 if (outer->precision == PETSC_PRECISION_SINGLE) { 474 CMUMPS_STRUC_C *inner = (CMUMPS_STRUC_C *)outer->internal_id; 475 PreMumpsCall(inner, outer, CMUMPS_COMPLEX); 476 PetscStackCallExternalVoid("cmumps_c", cmumps_c(inner)); 477 PostMumpsCall(inner, outer); 478 } else { 479 ZMUMPS_STRUC_C *inner = (ZMUMPS_STRUC_C *)outer->internal_id; 480 PreMumpsCall(inner, outer, ZMUMPS_COMPLEX); 481 PetscStackCallExternalVoid("zmumps_c", zmumps_c(inner)); 482 PostMumpsCall(inner, outer); 483 } 484 #else 485 if (outer->precision == PETSC_PRECISION_SINGLE) { 486 SMUMPS_STRUC_C *inner = (SMUMPS_STRUC_C *)outer->internal_id; 487 PreMumpsCall(inner, outer, SMUMPS_REAL); 488 PetscStackCallExternalVoid("smumps_c", smumps_c(inner)); 489 PostMumpsCall(inner, outer); 490 } else { 491 DMUMPS_STRUC_C *inner = (DMUMPS_STRUC_C *)outer->internal_id; 492 PreMumpsCall(inner, outer, DMUMPS_REAL); 493 PetscStackCallExternalVoid("dmumps_c", dmumps_c(inner)); 494 PostMumpsCall(inner, outer); 495 } 496 #endif 497 #else 498 MUMPS_STRUC_C *inner = (MUMPS_STRUC_C *)outer->internal_id; 499 PreMumpsCall(inner, outer, MumpsScalar); 500 PetscStackCallExternalVoid(PetscStringize(MUMPS_c), MUMPS_c(inner)); 501 PostMumpsCall(inner, outer); 502 #endif 503 PetscFunctionReturn(PETSC_SUCCESS); 504 } 505 506 /* macros s.t. indices match MUMPS documentation */ 507 #define ICNTL(I) icntl[(I) - 1] 508 #define INFOG(I) infog[(I) - 1] 509 #define INFO(I) info[(I) - 1] 510 511 // Get a value from a MumpsScalar array, which is the <F> field in the struct of MUMPS_STRUC_C. The value is convertible to PetscScalar. Note no minus 1 on I! 512 #if defined(PETSC_USE_COMPLEX) 513 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((CMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((CMUMPS_COMPLEX *)(ID).F)[I].i : ((ZMUMPS_COMPLEX *)(ID).F)[I].r + PETSC_i * ((ZMUMPS_COMPLEX *)(ID).F)[I].i) 514 #else 515 #define ID_FIELD_GET(ID, F, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).F)[I] : ((double *)(ID).F)[I]) 516 #endif 517 518 // Get a value from MumpsReal arrays. The value is convertible to PetscReal. 519 #define ID_CNTL_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).cntl)[(I) - 1] : ((double *)(ID).cntl)[(I) - 1]) 520 #define ID_RINFOG_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfog)[(I) - 1] : ((double *)(ID).rinfog)[(I) - 1]) 521 #define ID_RINFO_GET(ID, I) ((ID).precision == PETSC_PRECISION_SINGLE ? ((float *)(ID).rinfo)[(I) - 1] : ((double *)(ID).rinfo)[(I) - 1]) 522 523 // Set the I-th entry of the MumpsReal array id.cntl[] with a PetscReal <VAL> 524 #define ID_CNTL_SET(ID, I, VAL) \ 525 do { \ 526 if ((ID).precision == PETSC_PRECISION_SINGLE) ((float *)(ID).cntl)[(I) - 1] = (VAL); \ 527 else ((double *)(ID).cntl)[(I) - 1] = (VAL); \ 528 } while (0) 529 530 /* if using PETSc OpenMP support, we only call MUMPS on master ranks. Before/after the call, we change/restore CPUs the master ranks can run on */ 531 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 532 #define PetscMUMPS_c(mumps) \ 533 do { \ 534 if (mumps->use_petsc_omp_support) { \ 535 if (mumps->is_omp_master) { \ 536 PetscCall(PetscOmpCtrlOmpRegionOnMasterBegin(mumps->omp_ctrl)); \ 537 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 538 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 539 PetscCall(PetscFPTrapPop()); \ 540 PetscCall(PetscOmpCtrlOmpRegionOnMasterEnd(mumps->omp_ctrl)); \ 541 } \ 542 PetscCall(PetscOmpCtrlBarrier(mumps->omp_ctrl)); \ 543 /* Global info is same on all processes so we Bcast it within omp_comm. Local info is specific \ 544 to processes, so we only Bcast info[1], an error code and leave others (since they do not have \ 545 an easy translation between omp_comm and petsc_comm). See MUMPS-5.1.2 manual p82. \ 546 omp_comm is a small shared memory communicator, hence doing multiple Bcast as shown below is OK. \ 547 */ \ 548 MUMPS_STRUC_C tmp; /* All MUMPS_STRUC_C types have same lengths on these info arrays */ \ 549 PetscCallMPI(MPI_Bcast(mumps->id.infog, PETSC_STATIC_ARRAY_LENGTH(tmp.infog), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 550 PetscCallMPI(MPI_Bcast(mumps->id.info, PETSC_STATIC_ARRAY_LENGTH(tmp.info), MPIU_MUMPSINT, 0, mumps->omp_comm)); \ 551 PetscCallMPI(MPI_Bcast(mumps->id.rinfog, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfog), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 552 PetscCallMPI(MPI_Bcast(mumps->id.rinfo, PETSC_STATIC_ARRAY_LENGTH(tmp.rinfo), MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); \ 553 } else { \ 554 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 555 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 556 PetscCall(PetscFPTrapPop()); \ 557 } \ 558 } while (0) 559 #else 560 #define PetscMUMPS_c(mumps) \ 561 do { \ 562 PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF)); \ 563 PetscCall(PetscCallMumps_Private(&mumps->id)); \ 564 PetscCall(PetscFPTrapPop()); \ 565 } while (0) 566 #endif 567 568 typedef struct Mat_MUMPS Mat_MUMPS; 569 struct Mat_MUMPS { 570 XMUMPS_STRUC_C id; 571 572 MatStructure matstruc; 573 PetscMPIInt myid, petsc_size; 574 PetscMUMPSInt *irn, *jcn; /* the (i,j,v) triplets passed to mumps. */ 575 PetscScalar *val, *val_alloc; /* For some matrices, we can directly access their data array without a buffer. For others, we need a buffer. So comes val_alloc. */ 576 PetscCount nnz; /* number of nonzeros. The type is called selective 64-bit in mumps */ 577 PetscMUMPSInt sym; 578 MPI_Comm mumps_comm; 579 PetscMUMPSInt *ICNTL_pre; 580 PetscReal *CNTL_pre; 581 PetscMUMPSInt ICNTL9_pre; /* check if ICNTL(9) is changed from previous MatSolve */ 582 VecScatter scat_rhs, scat_sol; /* used by MatSolve() */ 583 PetscMUMPSInt ICNTL20; /* use centralized (0) or distributed (10) dense RHS */ 584 PetscMUMPSInt ICNTL26; 585 PetscMUMPSInt lrhs_loc, nloc_rhs, *irhs_loc; 586 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 587 PetscInt *rhs_nrow, max_nrhs; 588 PetscMPIInt *rhs_recvcounts, *rhs_disps; 589 PetscScalar *rhs_loc, *rhs_recvbuf; 590 #endif 591 Vec b_seq, x_seq; 592 PetscInt ninfo, *info; /* which INFO to display */ 593 PetscInt sizeredrhs; 594 PetscScalar *schur_sol; 595 PetscInt schur_sizesol; 596 PetscScalar *redrhs; // buffer in PetscScalar in case MumpsScalar is in a different precision 597 PetscMUMPSInt *ia_alloc, *ja_alloc; /* work arrays used for the CSR struct for sparse rhs */ 598 PetscCount cur_ilen, cur_jlen; /* current len of ia_alloc[], ja_alloc[] */ 599 PetscErrorCode (*ConvertToTriples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 600 601 /* Support for MATNEST */ 602 PetscErrorCode (**nest_convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *); 603 PetscCount *nest_vals_start; 604 PetscScalar *nest_vals; 605 606 /* stuff used by petsc/mumps OpenMP support*/ 607 PetscBool use_petsc_omp_support; 608 PetscOmpCtrl omp_ctrl; /* an OpenMP controller that blocked processes will release their CPU (MPI_Barrier does not have this guarantee) */ 609 MPI_Comm petsc_comm, omp_comm; /* petsc_comm is PETSc matrix's comm */ 610 PetscCount *recvcount; /* a collection of nnz on omp_master */ 611 PetscMPIInt tag, omp_comm_size; 612 PetscBool is_omp_master; /* is this rank the master of omp_comm */ 613 MPI_Request *reqs; 614 }; 615 616 /* Cast a 1-based CSR represented by (nrow, ia, ja) of type PetscInt to a CSR of type PetscMUMPSInt. 617 Here, nrow is number of rows, ia[] is row pointer and ja[] is column indices. 618 */ 619 static PetscErrorCode PetscMUMPSIntCSRCast(PETSC_UNUSED Mat_MUMPS *mumps, PetscInt nrow, PetscInt *ia, PetscInt *ja, PetscMUMPSInt **ia_mumps, PetscMUMPSInt **ja_mumps, PetscMUMPSInt *nnz_mumps) 620 { 621 PetscInt nnz = ia[nrow] - 1; /* mumps uses 1-based indices. Uses PetscInt instead of PetscCount since mumps only uses PetscMUMPSInt for rhs */ 622 623 PetscFunctionBegin; 624 #if defined(PETSC_USE_64BIT_INDICES) 625 { 626 PetscInt i; 627 if (nrow + 1 > mumps->cur_ilen) { /* realloc ia_alloc/ja_alloc to fit ia/ja */ 628 PetscCall(PetscFree(mumps->ia_alloc)); 629 PetscCall(PetscMalloc1(nrow + 1, &mumps->ia_alloc)); 630 mumps->cur_ilen = nrow + 1; 631 } 632 if (nnz > mumps->cur_jlen) { 633 PetscCall(PetscFree(mumps->ja_alloc)); 634 PetscCall(PetscMalloc1(nnz, &mumps->ja_alloc)); 635 mumps->cur_jlen = nnz; 636 } 637 for (i = 0; i < nrow + 1; i++) PetscCall(PetscMUMPSIntCast(ia[i], &mumps->ia_alloc[i])); 638 for (i = 0; i < nnz; i++) PetscCall(PetscMUMPSIntCast(ja[i], &mumps->ja_alloc[i])); 639 *ia_mumps = mumps->ia_alloc; 640 *ja_mumps = mumps->ja_alloc; 641 } 642 #else 643 *ia_mumps = ia; 644 *ja_mumps = ja; 645 #endif 646 PetscCall(PetscMUMPSIntCast(nnz, nnz_mumps)); 647 PetscFunctionReturn(PETSC_SUCCESS); 648 } 649 650 static PetscErrorCode MatMumpsResetSchur_Private(Mat_MUMPS *mumps) 651 { 652 PetscFunctionBegin; 653 PetscCall(PetscFree(mumps->id.listvar_schur)); 654 PetscCall(PetscFree(mumps->redrhs)); // if needed, id.redrhs will be freed in MatMumpsFreeInternalID() 655 PetscCall(PetscFree(mumps->schur_sol)); 656 mumps->id.size_schur = 0; 657 mumps->id.schur_lld = 0; 658 if (mumps->id.internal_id) mumps->id.ICNTL(19) = 0; // sometimes, the inner id is yet built 659 PetscFunctionReturn(PETSC_SUCCESS); 660 } 661 662 /* solve with rhs in mumps->id.redrhs and return in the same location */ 663 static PetscErrorCode MatMumpsSolveSchur_Private(Mat F) 664 { 665 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 666 Mat S, B, X; // solve S*X = B; all three matrices are dense 667 MatFactorSchurStatus schurstatus; 668 PetscInt sizesol; 669 const PetscScalar *xarray; 670 671 PetscFunctionBegin; 672 PetscCall(MatFactorFactorizeSchurComplement(F)); 673 PetscCall(MatFactorGetSchurComplement(F, &S, &schurstatus)); 674 PetscCall(MatMumpsCastMumpsScalarArray(mumps->sizeredrhs, mumps->id.precision, mumps->id.redrhs, mumps->redrhs)); 675 676 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &B)); 677 PetscCall(MatSetType(B, ((PetscObject)S)->type_name)); 678 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 679 PetscCall(MatBindToCPU(B, S->boundtocpu)); 680 #endif 681 switch (schurstatus) { 682 case MAT_FACTOR_SCHUR_FACTORED: 683 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->redrhs, &X)); 684 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 685 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 686 PetscCall(MatBindToCPU(X, S->boundtocpu)); 687 #endif 688 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 689 PetscCall(MatMatSolveTranspose(S, B, X)); 690 } else { 691 PetscCall(MatMatSolve(S, B, X)); 692 } 693 break; 694 case MAT_FACTOR_SCHUR_INVERTED: 695 sizesol = mumps->id.nrhs * mumps->id.size_schur; 696 if (!mumps->schur_sol || sizesol > mumps->schur_sizesol) { 697 PetscCall(PetscFree(mumps->schur_sol)); 698 PetscCall(PetscMalloc1(sizesol, &mumps->schur_sol)); 699 mumps->schur_sizesol = sizesol; 700 } 701 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, mumps->id.size_schur, mumps->id.nrhs, mumps->schur_sol, &X)); 702 PetscCall(MatSetType(X, ((PetscObject)S)->type_name)); 703 #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA) 704 PetscCall(MatBindToCPU(X, S->boundtocpu)); 705 #endif 706 PetscCall(MatProductCreateWithMat(S, B, NULL, X)); 707 if (!mumps->id.ICNTL(9)) { /* transpose solve */ 708 PetscCall(MatProductSetType(X, MATPRODUCT_AtB)); 709 } else { 710 PetscCall(MatProductSetType(X, MATPRODUCT_AB)); 711 } 712 PetscCall(MatProductSetFromOptions(X)); 713 PetscCall(MatProductSymbolic(X)); 714 PetscCall(MatProductNumeric(X)); 715 716 PetscCall(MatCopy(X, B, SAME_NONZERO_PATTERN)); 717 break; 718 default: 719 SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unhandled MatFactorSchurStatus %d", F->schur_status); 720 } 721 // MUST get the array from X (not B), though they share the same host array. We can only guarantee X has the correct data on device. 722 PetscCall(MatDenseGetArrayRead(X, &xarray)); // xarray should be mumps->redrhs, but using MatDenseGetArrayRead is safer with GPUs. 723 PetscCall(MatMumpsCastPetscScalarArray(mumps->sizeredrhs, xarray, mumps->id.precision, mumps->id.redrhs)); 724 PetscCall(MatDenseRestoreArrayRead(X, &xarray)); 725 PetscCall(MatFactorRestoreSchurComplement(F, &S, schurstatus)); 726 PetscCall(MatDestroy(&B)); 727 PetscCall(MatDestroy(&X)); 728 PetscFunctionReturn(PETSC_SUCCESS); 729 } 730 731 static PetscErrorCode MatMumpsHandleSchur_Private(Mat F, PetscBool expansion) 732 { 733 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 734 735 PetscFunctionBegin; 736 if (!mumps->id.ICNTL(19)) { /* do nothing when Schur complement has not been computed */ 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 if (!expansion) { /* prepare for the condensation step */ 740 PetscInt sizeredrhs = mumps->id.nrhs * mumps->id.size_schur; 741 /* allocate MUMPS internal array to store reduced right-hand sides */ 742 if (!mumps->id.redrhs || sizeredrhs > mumps->sizeredrhs) { 743 mumps->id.lredrhs = mumps->id.size_schur; 744 mumps->sizeredrhs = mumps->id.nrhs * mumps->id.lredrhs; 745 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 746 PetscCall(PetscFree(mumps->redrhs)); 747 PetscCall(PetscMalloc1(mumps->sizeredrhs, &mumps->redrhs)); 748 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, mumps->sizeredrhs, mumps->redrhs, mumps->id.precision, &mumps->id.redrhs_len, &mumps->id.redrhs)); 749 } 750 } else { /* prepare for the expansion step */ 751 PetscCall(MatMumpsSolveSchur_Private(F)); /* solve Schur complement, put solution in id.redrhs (this has to be done by the MUMPS user, so basically us) */ 752 mumps->id.ICNTL(26) = 2; /* expansion phase */ 753 PetscMUMPS_c(mumps); 754 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 755 /* restore defaults */ 756 mumps->id.ICNTL(26) = -1; 757 /* free MUMPS internal array for redrhs if we have solved for multiple rhs in order to save memory space */ 758 if (mumps->id.nrhs > 1) { 759 if (mumps->id.redrhs_len) PetscCall(PetscFree(mumps->id.redrhs)); 760 PetscCall(PetscFree(mumps->redrhs)); 761 mumps->id.redrhs_len = 0; 762 mumps->id.lredrhs = 0; 763 mumps->sizeredrhs = 0; 764 } 765 } 766 PetscFunctionReturn(PETSC_SUCCESS); 767 } 768 769 /* 770 MatConvertToTriples_A_B - convert PETSc matrix to triples: row[nz], col[nz], val[nz] 771 772 input: 773 A - matrix in aij,baij or sbaij format 774 shift - 0: C style output triple; 1: Fortran style output triple. 775 reuse - MAT_INITIAL_MATRIX: spaces are allocated and values are set for the triple 776 MAT_REUSE_MATRIX: only the values in v array are updated 777 output: 778 nnz - dim of r, c, and v (number of local nonzero entries of A) 779 r, c, v - row and col index, matrix values (matrix triples) 780 781 The returned values r, c, and sometimes v are obtained in a single PetscMalloc(). Then in MatDestroy_MUMPS() it is 782 freed with PetscFree(mumps->irn); This is not ideal code, the fact that v is ONLY sometimes part of mumps->irn means 783 that the PetscMalloc() cannot easily be replaced with a PetscMalloc3(). 784 785 */ 786 787 static PetscErrorCode MatConvertToTriples_seqaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 788 { 789 const PetscScalar *av; 790 const PetscInt *ai, *aj, *ajj, M = A->rmap->n; 791 PetscCount nz, rnz, k; 792 PetscMUMPSInt *row, *col; 793 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 794 795 PetscFunctionBegin; 796 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 797 if (reuse == MAT_INITIAL_MATRIX) { 798 nz = aa->nz; 799 ai = aa->i; 800 aj = aa->j; 801 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 802 for (PetscCount i = k = 0; i < M; i++) { 803 rnz = ai[i + 1] - ai[i]; 804 ajj = aj + ai[i]; 805 for (PetscCount j = 0; j < rnz; j++) { 806 PetscCall(PetscMUMPSIntCast(i + shift, &row[k])); 807 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[k])); 808 k++; 809 } 810 } 811 mumps->val = (PetscScalar *)av; 812 mumps->irn = row; 813 mumps->jcn = col; 814 mumps->nnz = nz; 815 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, aa->nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqaij_seqaij(), so one needs to copy the memory */ 816 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 817 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 818 PetscFunctionReturn(PETSC_SUCCESS); 819 } 820 821 static PetscErrorCode MatConvertToTriples_seqsell_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 822 { 823 PetscCount nz, i, j, k, r; 824 Mat_SeqSELL *a = (Mat_SeqSELL *)A->data; 825 PetscMUMPSInt *row, *col; 826 827 PetscFunctionBegin; 828 nz = a->sliidx[a->totalslices]; 829 if (reuse == MAT_INITIAL_MATRIX) { 830 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 831 for (i = k = 0; i < a->totalslices; i++) { 832 for (j = a->sliidx[i], r = 0; j < a->sliidx[i + 1]; j++, r = ((r + 1) & 0x07)) PetscCall(PetscMUMPSIntCast(8 * i + r + shift, &row[k++])); 833 } 834 for (i = 0; i < nz; i++) PetscCall(PetscMUMPSIntCast(a->colidx[i] + shift, &col[i])); 835 mumps->irn = row; 836 mumps->jcn = col; 837 mumps->nnz = nz; 838 mumps->val = a->val; 839 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, a->val, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsell_seqaij(), so one needs to copy the memory */ 840 else mumps->val = a->val; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 841 PetscFunctionReturn(PETSC_SUCCESS); 842 } 843 844 static PetscErrorCode MatConvertToTriples_seqbaij_seqaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 845 { 846 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)A->data; 847 const PetscInt *ai, *aj, *ajj, bs2 = aa->bs2; 848 PetscCount M, nz = bs2 * aa->nz, idx = 0, rnz, i, j, k, m; 849 PetscInt bs; 850 PetscMUMPSInt *row, *col; 851 852 PetscFunctionBegin; 853 if (reuse == MAT_INITIAL_MATRIX) { 854 PetscCall(MatGetBlockSize(A, &bs)); 855 M = A->rmap->N / bs; 856 ai = aa->i; 857 aj = aa->j; 858 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 859 for (i = 0; i < M; i++) { 860 ajj = aj + ai[i]; 861 rnz = ai[i + 1] - ai[i]; 862 for (k = 0; k < rnz; k++) { 863 for (j = 0; j < bs; j++) { 864 for (m = 0; m < bs; m++) { 865 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[idx])); 866 PetscCall(PetscMUMPSIntCast(bs * ajj[k] + j + shift, &col[idx])); 867 idx++; 868 } 869 } 870 } 871 } 872 mumps->irn = row; 873 mumps->jcn = col; 874 mumps->nnz = nz; 875 mumps->val = aa->a; 876 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, aa->a, nz)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqbaij_seqaij(), so one needs to copy the memory */ 877 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 878 PetscFunctionReturn(PETSC_SUCCESS); 879 } 880 881 static PetscErrorCode MatConvertToTriples_seqsbaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 882 { 883 const PetscInt *ai, *aj, *ajj; 884 PetscInt bs; 885 PetscCount nz, rnz, i, j, k, m; 886 PetscMUMPSInt *row, *col; 887 PetscScalar *val; 888 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)A->data; 889 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 890 #if defined(PETSC_USE_COMPLEX) 891 PetscBool isset, hermitian; 892 #endif 893 894 PetscFunctionBegin; 895 #if defined(PETSC_USE_COMPLEX) 896 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 897 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 898 #endif 899 ai = aa->i; 900 aj = aa->j; 901 PetscCall(MatGetBlockSize(A, &bs)); 902 if (reuse == MAT_INITIAL_MATRIX) { 903 const PetscCount alloc_size = aa->nz * bs2; 904 905 PetscCall(PetscMalloc2(alloc_size, &row, alloc_size, &col)); 906 if (bs > 1) { 907 PetscCall(PetscMalloc1(alloc_size, &mumps->val_alloc)); 908 mumps->val = mumps->val_alloc; 909 } else { 910 mumps->val = aa->a; 911 } 912 mumps->irn = row; 913 mumps->jcn = col; 914 } else { 915 row = mumps->irn; 916 col = mumps->jcn; 917 } 918 val = mumps->val; 919 920 nz = 0; 921 if (bs > 1) { 922 for (i = 0; i < mbs; i++) { 923 rnz = ai[i + 1] - ai[i]; 924 ajj = aj + ai[i]; 925 for (j = 0; j < rnz; j++) { 926 for (k = 0; k < bs; k++) { 927 for (m = 0; m < bs; m++) { 928 if (ajj[j] > i || k >= m) { 929 if (reuse == MAT_INITIAL_MATRIX) { 930 PetscCall(PetscMUMPSIntCast(i * bs + m + shift, &row[nz])); 931 PetscCall(PetscMUMPSIntCast(ajj[j] * bs + k + shift, &col[nz])); 932 } 933 val[nz++] = aa->a[(ai[i] + j) * bs2 + m + k * bs]; 934 } 935 } 936 } 937 } 938 } 939 } else if (reuse == MAT_INITIAL_MATRIX) { 940 for (i = 0; i < mbs; i++) { 941 rnz = ai[i + 1] - ai[i]; 942 ajj = aj + ai[i]; 943 for (j = 0; j < rnz; j++) { 944 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 945 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 946 nz++; 947 } 948 } 949 PetscCheck(nz == aa->nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different numbers of nonzeros %" PetscCount_FMT " != %" PetscInt_FMT, nz, aa->nz); 950 } else if (mumps->nest_vals) 951 PetscCall(PetscArraycpy(mumps->val, aa->a, aa->nz)); /* bs == 1 and MAT_REUSE_MATRIX, MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_seqsbaij_seqsbaij(), so one needs to copy the memory */ 952 else mumps->val = aa->a; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 953 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = nz; 954 PetscFunctionReturn(PETSC_SUCCESS); 955 } 956 957 static PetscErrorCode MatConvertToTriples_seqaij_seqsbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 958 { 959 const PetscInt *ai, *aj, *ajj, *adiag, M = A->rmap->n; 960 PetscCount nz, rnz, i, j; 961 const PetscScalar *av, *v1; 962 PetscScalar *val; 963 PetscMUMPSInt *row, *col; 964 Mat_SeqAIJ *aa = (Mat_SeqAIJ *)A->data; 965 PetscBool diagDense; 966 #if defined(PETSC_USE_COMPLEX) 967 PetscBool hermitian, isset; 968 #endif 969 970 PetscFunctionBegin; 971 #if defined(PETSC_USE_COMPLEX) 972 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 973 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 974 #endif 975 PetscCall(MatSeqAIJGetArrayRead(A, &av)); 976 ai = aa->i; 977 aj = aa->j; 978 PetscCall(MatGetDiagonalMarkers_SeqAIJ(A, &adiag, &diagDense)); 979 if (reuse == MAT_INITIAL_MATRIX) { 980 /* count nz in the upper triangular part of A */ 981 nz = 0; 982 if (!diagDense) { 983 for (i = 0; i < M; i++) { 984 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 985 for (j = ai[i]; j < ai[i + 1]; j++) { 986 if (aj[j] < i) continue; 987 nz++; 988 } 989 } else { 990 nz += ai[i + 1] - adiag[i]; 991 } 992 } 993 } else { 994 for (i = 0; i < M; i++) nz += ai[i + 1] - adiag[i]; 995 } 996 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 997 PetscCall(PetscMalloc1(nz, &val)); 998 mumps->nnz = nz; 999 mumps->irn = row; 1000 mumps->jcn = col; 1001 mumps->val = mumps->val_alloc = val; 1002 1003 nz = 0; 1004 if (!diagDense) { 1005 for (i = 0; i < M; i++) { 1006 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 1007 for (j = ai[i]; j < ai[i + 1]; j++) { 1008 if (aj[j] < i) continue; 1009 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1010 PetscCall(PetscMUMPSIntCast(aj[j] + shift, &col[nz])); 1011 val[nz] = av[j]; 1012 nz++; 1013 } 1014 } else { 1015 rnz = ai[i + 1] - adiag[i]; 1016 ajj = aj + adiag[i]; 1017 v1 = av + adiag[i]; 1018 for (j = 0; j < rnz; j++) { 1019 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1020 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1021 val[nz++] = v1[j]; 1022 } 1023 } 1024 } 1025 } else { 1026 for (i = 0; i < M; i++) { 1027 rnz = ai[i + 1] - adiag[i]; 1028 ajj = aj + adiag[i]; 1029 v1 = av + adiag[i]; 1030 for (j = 0; j < rnz; j++) { 1031 PetscCall(PetscMUMPSIntCast(i + shift, &row[nz])); 1032 PetscCall(PetscMUMPSIntCast(ajj[j] + shift, &col[nz])); 1033 val[nz++] = v1[j]; 1034 } 1035 } 1036 } 1037 } else { 1038 nz = 0; 1039 val = mumps->val; 1040 if (!diagDense) { 1041 for (i = 0; i < M; i++) { 1042 if (PetscUnlikely(adiag[i] >= ai[i + 1])) { 1043 for (j = ai[i]; j < ai[i + 1]; j++) { 1044 if (aj[j] < i) continue; 1045 val[nz++] = av[j]; 1046 } 1047 } else { 1048 rnz = ai[i + 1] - adiag[i]; 1049 v1 = av + adiag[i]; 1050 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1051 } 1052 } 1053 } else { 1054 for (i = 0; i < M; i++) { 1055 rnz = ai[i + 1] - adiag[i]; 1056 v1 = av + adiag[i]; 1057 for (j = 0; j < rnz; j++) val[nz++] = v1[j]; 1058 } 1059 } 1060 } 1061 PetscCall(MatSeqAIJRestoreArrayRead(A, &av)); 1062 PetscFunctionReturn(PETSC_SUCCESS); 1063 } 1064 1065 static PetscErrorCode MatConvertToTriples_mpisbaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1066 { 1067 const PetscInt *ai, *aj, *bi, *bj, *garray, *ajj, *bjj; 1068 PetscInt bs; 1069 PetscCount rstart, nz, i, j, k, m, jj, irow, countA, countB; 1070 PetscMUMPSInt *row, *col; 1071 const PetscScalar *av, *bv, *v1, *v2; 1072 PetscScalar *val; 1073 Mat_MPISBAIJ *mat = (Mat_MPISBAIJ *)A->data; 1074 Mat_SeqSBAIJ *aa = (Mat_SeqSBAIJ *)mat->A->data; 1075 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1076 const PetscInt bs2 = aa->bs2, mbs = aa->mbs; 1077 #if defined(PETSC_USE_COMPLEX) 1078 PetscBool hermitian, isset; 1079 #endif 1080 1081 PetscFunctionBegin; 1082 #if defined(PETSC_USE_COMPLEX) 1083 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1084 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1085 #endif 1086 PetscCall(MatGetBlockSize(A, &bs)); 1087 rstart = A->rmap->rstart; 1088 ai = aa->i; 1089 aj = aa->j; 1090 bi = bb->i; 1091 bj = bb->j; 1092 av = aa->a; 1093 bv = bb->a; 1094 1095 garray = mat->garray; 1096 1097 if (reuse == MAT_INITIAL_MATRIX) { 1098 nz = (aa->nz + bb->nz) * bs2; /* just a conservative estimate */ 1099 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1100 PetscCall(PetscMalloc1(nz, &val)); 1101 /* can not decide the exact mumps->nnz now because of the SBAIJ */ 1102 mumps->irn = row; 1103 mumps->jcn = col; 1104 mumps->val = mumps->val_alloc = val; 1105 } else { 1106 val = mumps->val; 1107 } 1108 1109 jj = 0; 1110 irow = rstart; 1111 for (i = 0; i < mbs; i++) { 1112 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1113 countA = ai[i + 1] - ai[i]; 1114 countB = bi[i + 1] - bi[i]; 1115 bjj = bj + bi[i]; 1116 v1 = av + ai[i] * bs2; 1117 v2 = bv + bi[i] * bs2; 1118 1119 if (bs > 1) { 1120 /* A-part */ 1121 for (j = 0; j < countA; j++) { 1122 for (k = 0; k < bs; k++) { 1123 for (m = 0; m < bs; m++) { 1124 if (rstart + ajj[j] * bs > irow || k >= m) { 1125 if (reuse == MAT_INITIAL_MATRIX) { 1126 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1127 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] * bs + k + shift, &col[jj])); 1128 } 1129 val[jj++] = v1[j * bs2 + m + k * bs]; 1130 } 1131 } 1132 } 1133 } 1134 1135 /* B-part */ 1136 for (j = 0; j < countB; j++) { 1137 for (k = 0; k < bs; k++) { 1138 for (m = 0; m < bs; m++) { 1139 if (reuse == MAT_INITIAL_MATRIX) { 1140 PetscCall(PetscMUMPSIntCast(irow + m + shift, &row[jj])); 1141 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] * bs + k + shift, &col[jj])); 1142 } 1143 val[jj++] = v2[j * bs2 + m + k * bs]; 1144 } 1145 } 1146 } 1147 } else { 1148 /* A-part */ 1149 for (j = 0; j < countA; j++) { 1150 if (reuse == MAT_INITIAL_MATRIX) { 1151 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1152 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1153 } 1154 val[jj++] = v1[j]; 1155 } 1156 1157 /* B-part */ 1158 for (j = 0; j < countB; j++) { 1159 if (reuse == MAT_INITIAL_MATRIX) { 1160 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1161 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1162 } 1163 val[jj++] = v2[j]; 1164 } 1165 } 1166 irow += bs; 1167 } 1168 if (reuse == MAT_INITIAL_MATRIX) mumps->nnz = jj; 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 static PetscErrorCode MatConvertToTriples_mpiaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1173 { 1174 const PetscInt *ai, *aj, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1175 PetscCount rstart, cstart, nz, i, j, jj, irow, countA, countB; 1176 PetscMUMPSInt *row, *col; 1177 const PetscScalar *av, *bv, *v1, *v2; 1178 PetscScalar *val; 1179 Mat Ad, Ao; 1180 Mat_SeqAIJ *aa; 1181 Mat_SeqAIJ *bb; 1182 1183 PetscFunctionBegin; 1184 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1185 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1186 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1187 1188 aa = (Mat_SeqAIJ *)Ad->data; 1189 bb = (Mat_SeqAIJ *)Ao->data; 1190 ai = aa->i; 1191 aj = aa->j; 1192 bi = bb->i; 1193 bj = bb->j; 1194 1195 rstart = A->rmap->rstart; 1196 cstart = A->cmap->rstart; 1197 1198 if (reuse == MAT_INITIAL_MATRIX) { 1199 nz = (PetscCount)aa->nz + bb->nz; /* make sure the sum won't overflow PetscInt */ 1200 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1201 PetscCall(PetscMalloc1(nz, &val)); 1202 mumps->nnz = nz; 1203 mumps->irn = row; 1204 mumps->jcn = col; 1205 mumps->val = mumps->val_alloc = val; 1206 } else { 1207 val = mumps->val; 1208 } 1209 1210 jj = 0; 1211 irow = rstart; 1212 for (i = 0; i < m; i++) { 1213 ajj = aj + ai[i]; /* ptr to the beginning of this row */ 1214 countA = ai[i + 1] - ai[i]; 1215 countB = bi[i + 1] - bi[i]; 1216 bjj = bj + bi[i]; 1217 v1 = av + ai[i]; 1218 v2 = bv + bi[i]; 1219 1220 /* A-part */ 1221 for (j = 0; j < countA; j++) { 1222 if (reuse == MAT_INITIAL_MATRIX) { 1223 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1224 PetscCall(PetscMUMPSIntCast(cstart + ajj[j] + shift, &col[jj])); 1225 } 1226 val[jj++] = v1[j]; 1227 } 1228 1229 /* B-part */ 1230 for (j = 0; j < countB; j++) { 1231 if (reuse == MAT_INITIAL_MATRIX) { 1232 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1233 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1234 } 1235 val[jj++] = v2[j]; 1236 } 1237 irow++; 1238 } 1239 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1240 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 static PetscErrorCode MatConvertToTriples_mpibaij_mpiaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1245 { 1246 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)A->data; 1247 Mat_SeqBAIJ *aa = (Mat_SeqBAIJ *)mat->A->data; 1248 Mat_SeqBAIJ *bb = (Mat_SeqBAIJ *)mat->B->data; 1249 const PetscInt *ai = aa->i, *bi = bb->i, *aj = aa->j, *bj = bb->j, *ajj, *bjj; 1250 const PetscInt *garray = mat->garray, mbs = mat->mbs, rstart = A->rmap->rstart, cstart = A->cmap->rstart; 1251 const PetscInt bs2 = mat->bs2; 1252 PetscInt bs; 1253 PetscCount nz, i, j, k, n, jj, irow, countA, countB, idx; 1254 PetscMUMPSInt *row, *col; 1255 const PetscScalar *av = aa->a, *bv = bb->a, *v1, *v2; 1256 PetscScalar *val; 1257 1258 PetscFunctionBegin; 1259 PetscCall(MatGetBlockSize(A, &bs)); 1260 if (reuse == MAT_INITIAL_MATRIX) { 1261 nz = bs2 * (aa->nz + bb->nz); 1262 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1263 PetscCall(PetscMalloc1(nz, &val)); 1264 mumps->nnz = nz; 1265 mumps->irn = row; 1266 mumps->jcn = col; 1267 mumps->val = mumps->val_alloc = val; 1268 } else { 1269 val = mumps->val; 1270 } 1271 1272 jj = 0; 1273 irow = rstart; 1274 for (i = 0; i < mbs; i++) { 1275 countA = ai[i + 1] - ai[i]; 1276 countB = bi[i + 1] - bi[i]; 1277 ajj = aj + ai[i]; 1278 bjj = bj + bi[i]; 1279 v1 = av + bs2 * ai[i]; 1280 v2 = bv + bs2 * bi[i]; 1281 1282 idx = 0; 1283 /* A-part */ 1284 for (k = 0; k < countA; k++) { 1285 for (j = 0; j < bs; j++) { 1286 for (n = 0; n < bs; n++) { 1287 if (reuse == MAT_INITIAL_MATRIX) { 1288 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1289 PetscCall(PetscMUMPSIntCast(cstart + bs * ajj[k] + j + shift, &col[jj])); 1290 } 1291 val[jj++] = v1[idx++]; 1292 } 1293 } 1294 } 1295 1296 idx = 0; 1297 /* B-part */ 1298 for (k = 0; k < countB; k++) { 1299 for (j = 0; j < bs; j++) { 1300 for (n = 0; n < bs; n++) { 1301 if (reuse == MAT_INITIAL_MATRIX) { 1302 PetscCall(PetscMUMPSIntCast(irow + n + shift, &row[jj])); 1303 PetscCall(PetscMUMPSIntCast(bs * garray[bjj[k]] + j + shift, &col[jj])); 1304 } 1305 val[jj++] = v2[idx++]; 1306 } 1307 } 1308 } 1309 irow += bs; 1310 } 1311 PetscFunctionReturn(PETSC_SUCCESS); 1312 } 1313 1314 static PetscErrorCode MatConvertToTriples_mpiaij_mpisbaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1315 { 1316 const PetscInt *ai, *aj, *adiag, *bi, *bj, *garray, m = A->rmap->n, *ajj, *bjj; 1317 PetscCount rstart, nz, nza, nzb, i, j, jj, irow, countA, countB; 1318 PetscMUMPSInt *row, *col; 1319 const PetscScalar *av, *bv, *v1, *v2; 1320 PetscScalar *val; 1321 Mat Ad, Ao; 1322 Mat_SeqAIJ *aa; 1323 Mat_SeqAIJ *bb; 1324 #if defined(PETSC_USE_COMPLEX) 1325 PetscBool hermitian, isset; 1326 #endif 1327 1328 PetscFunctionBegin; 1329 #if defined(PETSC_USE_COMPLEX) 1330 PetscCall(MatIsHermitianKnown(A, &isset, &hermitian)); 1331 PetscCheck(!isset || !hermitian, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MUMPS does not support Hermitian symmetric matrices for Choleksy"); 1332 #endif 1333 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &garray)); 1334 PetscCall(MatSeqAIJGetArrayRead(Ad, &av)); 1335 PetscCall(MatSeqAIJGetArrayRead(Ao, &bv)); 1336 1337 aa = (Mat_SeqAIJ *)Ad->data; 1338 bb = (Mat_SeqAIJ *)Ao->data; 1339 ai = aa->i; 1340 aj = aa->j; 1341 bi = bb->i; 1342 bj = bb->j; 1343 PetscCall(MatGetDiagonalMarkers_SeqAIJ(Ad, &adiag, NULL)); 1344 rstart = A->rmap->rstart; 1345 1346 if (reuse == MAT_INITIAL_MATRIX) { 1347 nza = 0; /* num of upper triangular entries in mat->A, including diagonals */ 1348 nzb = 0; /* num of upper triangular entries in mat->B */ 1349 for (i = 0; i < m; i++) { 1350 nza += (ai[i + 1] - adiag[i]); 1351 countB = bi[i + 1] - bi[i]; 1352 bjj = bj + bi[i]; 1353 for (j = 0; j < countB; j++) { 1354 if (garray[bjj[j]] > rstart) nzb++; 1355 } 1356 } 1357 1358 nz = nza + nzb; /* total nz of upper triangular part of mat */ 1359 PetscCall(PetscMalloc2(nz, &row, nz, &col)); 1360 PetscCall(PetscMalloc1(nz, &val)); 1361 mumps->nnz = nz; 1362 mumps->irn = row; 1363 mumps->jcn = col; 1364 mumps->val = mumps->val_alloc = val; 1365 } else { 1366 val = mumps->val; 1367 } 1368 1369 jj = 0; 1370 irow = rstart; 1371 for (i = 0; i < m; i++) { 1372 ajj = aj + adiag[i]; /* ptr to the beginning of the diagonal of this row */ 1373 v1 = av + adiag[i]; 1374 countA = ai[i + 1] - adiag[i]; 1375 countB = bi[i + 1] - bi[i]; 1376 bjj = bj + bi[i]; 1377 v2 = bv + bi[i]; 1378 1379 /* A-part */ 1380 for (j = 0; j < countA; j++) { 1381 if (reuse == MAT_INITIAL_MATRIX) { 1382 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1383 PetscCall(PetscMUMPSIntCast(rstart + ajj[j] + shift, &col[jj])); 1384 } 1385 val[jj++] = v1[j]; 1386 } 1387 1388 /* B-part */ 1389 for (j = 0; j < countB; j++) { 1390 if (garray[bjj[j]] > rstart) { 1391 if (reuse == MAT_INITIAL_MATRIX) { 1392 PetscCall(PetscMUMPSIntCast(irow + shift, &row[jj])); 1393 PetscCall(PetscMUMPSIntCast(garray[bjj[j]] + shift, &col[jj])); 1394 } 1395 val[jj++] = v2[j]; 1396 } 1397 } 1398 irow++; 1399 } 1400 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &av)); 1401 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &bv)); 1402 PetscFunctionReturn(PETSC_SUCCESS); 1403 } 1404 1405 static PetscErrorCode MatConvertToTriples_diagonal_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1406 { 1407 const PetscScalar *av; 1408 const PetscInt M = A->rmap->n; 1409 PetscCount i; 1410 PetscMUMPSInt *row, *col; 1411 Vec v; 1412 1413 PetscFunctionBegin; 1414 PetscCall(MatDiagonalGetDiagonal(A, &v)); 1415 PetscCall(VecGetArrayRead(v, &av)); 1416 if (reuse == MAT_INITIAL_MATRIX) { 1417 PetscCall(PetscMalloc2(M, &row, M, &col)); 1418 for (i = 0; i < M; i++) { 1419 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1420 col[i] = row[i]; 1421 } 1422 mumps->val = (PetscScalar *)av; 1423 mumps->irn = row; 1424 mumps->jcn = col; 1425 mumps->nnz = M; 1426 } else if (mumps->nest_vals) PetscCall(PetscArraycpy(mumps->val, av, M)); /* MatConvertToTriples_nest_xaij() allocates mumps->val outside of MatConvertToTriples_diagonal_xaij(), so one needs to copy the memory */ 1427 else mumps->val = (PetscScalar *)av; /* in the default case, mumps->val is never allocated, one just needs to update the mumps->val pointer */ 1428 PetscCall(VecRestoreArrayRead(v, &av)); 1429 PetscFunctionReturn(PETSC_SUCCESS); 1430 } 1431 1432 static PetscErrorCode MatConvertToTriples_dense_xaij(Mat A, PETSC_UNUSED PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1433 { 1434 PetscScalar *v; 1435 const PetscInt m = A->rmap->n, N = A->cmap->N; 1436 PetscInt lda; 1437 PetscCount i, j; 1438 PetscMUMPSInt *row, *col; 1439 1440 PetscFunctionBegin; 1441 PetscCall(MatDenseGetArray(A, &v)); 1442 PetscCall(MatDenseGetLDA(A, &lda)); 1443 if (reuse == MAT_INITIAL_MATRIX) { 1444 PetscCall(PetscMalloc2(m * N, &row, m * N, &col)); 1445 for (i = 0; i < m; i++) { 1446 col[i] = 0; 1447 PetscCall(PetscMUMPSIntCast(i + A->rmap->rstart, &row[i])); 1448 } 1449 for (j = 1; j < N; j++) { 1450 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(j, col + i + m * j)); 1451 PetscCall(PetscArraycpy(row + m * j, row + m * (j - 1), m)); 1452 } 1453 if (lda == m) mumps->val = v; 1454 else { 1455 PetscCall(PetscMalloc1(m * N, &mumps->val)); 1456 mumps->val_alloc = mumps->val; 1457 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1458 } 1459 mumps->irn = row; 1460 mumps->jcn = col; 1461 mumps->nnz = m * N; 1462 } else { 1463 if (lda == m && !mumps->nest_vals) mumps->val = v; 1464 else { 1465 for (j = 0; j < N; j++) PetscCall(PetscArraycpy(mumps->val + m * j, v + lda * j, m)); 1466 } 1467 } 1468 PetscCall(MatDenseRestoreArray(A, &v)); 1469 PetscFunctionReturn(PETSC_SUCCESS); 1470 } 1471 1472 // If the input Mat (sub) is either MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL, this function gets the parent Mat until it is not a 1473 // MATTRANSPOSEVIRTUAL or MATHERMITIANTRANSPOSEVIRTUAL itself and returns the appropriate shift, scaling, and whether the parent Mat should be conjugated 1474 // and its rows and columns permuted 1475 // TODO FIXME: this should not be in this file and should instead be refactored where the same logic applies, e.g., MatAXPY_Dense_Nest() 1476 static PetscErrorCode MatGetTranspose_TransposeVirtual(Mat *sub, PetscBool *conjugate, PetscScalar *vshift, PetscScalar *vscale, PetscBool *swap) 1477 { 1478 Mat A; 1479 PetscScalar s[2]; 1480 PetscBool isTrans, isHTrans, compare; 1481 1482 PetscFunctionBegin; 1483 do { 1484 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATTRANSPOSEVIRTUAL, &isTrans)); 1485 if (isTrans) { 1486 PetscCall(MatTransposeGetMat(*sub, &A)); 1487 isHTrans = PETSC_FALSE; 1488 } else { 1489 PetscCall(PetscObjectTypeCompare((PetscObject)*sub, MATHERMITIANTRANSPOSEVIRTUAL, &isHTrans)); 1490 if (isHTrans) PetscCall(MatHermitianTransposeGetMat(*sub, &A)); 1491 } 1492 compare = (PetscBool)(isTrans || isHTrans); 1493 if (compare) { 1494 if (vshift && vscale) { 1495 PetscCall(MatShellGetScalingShifts(*sub, s, s + 1, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 1496 if (!*conjugate) { 1497 *vshift += s[0] * *vscale; 1498 *vscale *= s[1]; 1499 } else { 1500 *vshift += PetscConj(s[0]) * *vscale; 1501 *vscale *= PetscConj(s[1]); 1502 } 1503 } 1504 if (swap) *swap = (PetscBool)!*swap; 1505 if (isHTrans && conjugate) *conjugate = (PetscBool)!*conjugate; 1506 *sub = A; 1507 } 1508 } while (compare); 1509 PetscFunctionReturn(PETSC_SUCCESS); 1510 } 1511 1512 static PetscErrorCode MatConvertToTriples_nest_xaij(Mat A, PetscInt shift, MatReuse reuse, Mat_MUMPS *mumps) 1513 { 1514 Mat **mats; 1515 PetscInt nr, nc; 1516 PetscBool chol = mumps->sym ? PETSC_TRUE : PETSC_FALSE; 1517 1518 PetscFunctionBegin; 1519 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 1520 if (reuse == MAT_INITIAL_MATRIX) { 1521 PetscMUMPSInt *irns, *jcns; 1522 PetscScalar *vals; 1523 PetscCount totnnz, cumnnz, maxnnz; 1524 PetscInt *pjcns_w, Mbs = 0; 1525 IS *rows, *cols; 1526 PetscInt **rows_idx, **cols_idx; 1527 1528 cumnnz = 0; 1529 maxnnz = 0; 1530 PetscCall(PetscMalloc2(nr * nc + 1, &mumps->nest_vals_start, nr * nc, &mumps->nest_convert_to_triples)); 1531 for (PetscInt r = 0; r < nr; r++) { 1532 for (PetscInt c = 0; c < nc; c++) { 1533 Mat sub = mats[r][c]; 1534 1535 mumps->nest_convert_to_triples[r * nc + c] = NULL; 1536 if (chol && c < r) continue; /* skip lower-triangular block for Cholesky */ 1537 if (sub) { 1538 PetscErrorCode (*convert_to_triples)(Mat, PetscInt, MatReuse, Mat_MUMPS *) = NULL; 1539 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 1540 MatInfo info; 1541 1542 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 1543 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 1544 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 1545 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 1546 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 1547 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 1548 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 1549 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 1550 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 1551 1552 if (chol) { 1553 if (r == c) { 1554 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqsbaij; 1555 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpisbaij; 1556 else if (isSeqSBAIJ) convert_to_triples = MatConvertToTriples_seqsbaij_seqsbaij; 1557 else if (isMPISBAIJ) convert_to_triples = MatConvertToTriples_mpisbaij_mpisbaij; 1558 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1559 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1560 } else { 1561 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1562 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1563 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1564 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1565 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1566 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1567 } 1568 } else { 1569 if (isSeqAIJ) convert_to_triples = MatConvertToTriples_seqaij_seqaij; 1570 else if (isMPIAIJ) convert_to_triples = MatConvertToTriples_mpiaij_mpiaij; 1571 else if (isSeqBAIJ) convert_to_triples = MatConvertToTriples_seqbaij_seqaij; 1572 else if (isMPIBAIJ) convert_to_triples = MatConvertToTriples_mpibaij_mpiaij; 1573 else if (isDiag) convert_to_triples = MatConvertToTriples_diagonal_xaij; 1574 else if (isDense) convert_to_triples = MatConvertToTriples_dense_xaij; 1575 } 1576 PetscCheck(convert_to_triples, PetscObjectComm((PetscObject)sub), PETSC_ERR_SUP, "Not for block of type %s", ((PetscObject)sub)->type_name); 1577 mumps->nest_convert_to_triples[r * nc + c] = convert_to_triples; 1578 PetscCall(MatGetInfo(sub, MAT_LOCAL, &info)); 1579 cumnnz += (PetscCount)info.nz_used; /* can be overestimated for Cholesky */ 1580 maxnnz = PetscMax(maxnnz, info.nz_used); 1581 } 1582 } 1583 } 1584 1585 /* Allocate total COO */ 1586 totnnz = cumnnz; 1587 PetscCall(PetscMalloc2(totnnz, &irns, totnnz, &jcns)); 1588 PetscCall(PetscMalloc1(totnnz, &vals)); 1589 1590 /* Handle rows and column maps 1591 We directly map rows and use an SF for the columns */ 1592 PetscCall(PetscMalloc4(nr, &rows, nc, &cols, nr, &rows_idx, nc, &cols_idx)); 1593 PetscCall(MatNestGetISs(A, rows, cols)); 1594 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1595 for (PetscInt c = 0; c < nc; c++) PetscCall(ISGetIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1596 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscMalloc1(maxnnz, &pjcns_w)); 1597 else (void)maxnnz; 1598 1599 cumnnz = 0; 1600 for (PetscInt r = 0; r < nr; r++) { 1601 for (PetscInt c = 0; c < nc; c++) { 1602 Mat sub = mats[r][c]; 1603 const PetscInt *ridx = rows_idx[r]; 1604 const PetscInt *cidx = cols_idx[c]; 1605 PetscScalar vscale = 1.0, vshift = 0.0; 1606 PetscInt rst, size, bs; 1607 PetscSF csf; 1608 PetscBool conjugate = PETSC_FALSE, swap = PETSC_FALSE; 1609 PetscLayout cmap; 1610 PetscInt innz; 1611 1612 mumps->nest_vals_start[r * nc + c] = cumnnz; 1613 if (c == r) { 1614 PetscCall(ISGetSize(rows[r], &size)); 1615 if (!mumps->nest_convert_to_triples[r * nc + c]) { 1616 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1617 } 1618 PetscCall(MatGetBlockSize(sub, &bs)); 1619 Mbs += size / bs; 1620 } 1621 if (!mumps->nest_convert_to_triples[r * nc + c]) continue; 1622 1623 /* Extract inner blocks if needed */ 1624 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, &swap)); 1625 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1626 1627 /* Get column layout to map off-process columns */ 1628 PetscCall(MatGetLayouts(sub, NULL, &cmap)); 1629 1630 /* Get row start to map on-process rows */ 1631 PetscCall(MatGetOwnershipRange(sub, &rst, NULL)); 1632 1633 /* Directly use the mumps datastructure and use C ordering for now */ 1634 PetscCall((*mumps->nest_convert_to_triples[r * nc + c])(sub, 0, MAT_INITIAL_MATRIX, mumps)); 1635 1636 /* Swap the role of rows and columns indices for transposed blocks 1637 since we need values with global final ordering */ 1638 if (swap) { 1639 cidx = rows_idx[r]; 1640 ridx = cols_idx[c]; 1641 } 1642 1643 /* Communicate column indices 1644 This could have been done with a single SF but it would have complicated the code a lot. 1645 But since we do it only once, we pay the price of setting up an SF for each block */ 1646 if (PetscDefined(USE_64BIT_INDICES)) { 1647 for (PetscInt k = 0; k < mumps->nnz; k++) pjcns_w[k] = mumps->jcn[k]; 1648 } else pjcns_w = (PetscInt *)mumps->jcn; /* This cast is needed only to silence warnings for 64bit integers builds */ 1649 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &csf)); 1650 PetscCall(PetscIntCast(mumps->nnz, &innz)); 1651 PetscCall(PetscSFSetGraphLayout(csf, cmap, innz, NULL, PETSC_OWN_POINTER, pjcns_w)); 1652 PetscCall(PetscSFBcastBegin(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1653 PetscCall(PetscSFBcastEnd(csf, MPIU_INT, cidx, pjcns_w, MPI_REPLACE)); 1654 PetscCall(PetscSFDestroy(&csf)); 1655 1656 /* Import indices: use direct map for rows and mapped indices for columns */ 1657 if (swap) { 1658 for (PetscInt k = 0; k < mumps->nnz; k++) { 1659 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &jcns[cumnnz + k])); 1660 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &irns[cumnnz + k])); 1661 } 1662 } else { 1663 for (PetscInt k = 0; k < mumps->nnz; k++) { 1664 PetscCall(PetscMUMPSIntCast(ridx[mumps->irn[k] - rst] + shift, &irns[cumnnz + k])); 1665 PetscCall(PetscMUMPSIntCast(pjcns_w[k] + shift, &jcns[cumnnz + k])); 1666 } 1667 } 1668 1669 /* Import values to full COO */ 1670 if (conjugate) { /* conjugate the entries */ 1671 PetscScalar *v = vals + cumnnz; 1672 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * PetscConj(mumps->val[k]); 1673 } else if (vscale != 1.0) { 1674 PetscScalar *v = vals + cumnnz; 1675 for (PetscInt k = 0; k < mumps->nnz; k++) v[k] = vscale * mumps->val[k]; 1676 } else PetscCall(PetscArraycpy(vals + cumnnz, mumps->val, mumps->nnz)); 1677 1678 /* Shift new starting point and sanity check */ 1679 cumnnz += mumps->nnz; 1680 PetscCheck(cumnnz <= totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1681 1682 /* Free scratch memory */ 1683 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1684 PetscCall(PetscFree(mumps->val_alloc)); 1685 mumps->val = NULL; 1686 mumps->nnz = 0; 1687 } 1688 } 1689 if (mumps->id.ICNTL(15) == 1) { 1690 if (Mbs != A->rmap->N) { 1691 PetscMPIInt rank, size; 1692 1693 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 1694 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 1695 if (rank == 0) { 1696 PetscInt shift = 0; 1697 1698 PetscCall(PetscMUMPSIntCast(Mbs, &mumps->id.nblk)); 1699 PetscCall(PetscFree(mumps->id.blkptr)); 1700 PetscCall(PetscMalloc1(Mbs + 1, &mumps->id.blkptr)); 1701 mumps->id.blkptr[0] = 1; 1702 for (PetscInt i = 0; i < size; ++i) { 1703 for (PetscInt r = 0; r < nr; r++) { 1704 Mat sub = mats[r][r]; 1705 const PetscInt *ranges; 1706 PetscInt bs; 1707 1708 for (PetscInt c = 0; c < nc && !sub; ++c) sub = mats[r][c]; // diagonal Mat is NULL, so start over from the beginning of the current row 1709 PetscCall(MatGetOwnershipRanges(sub, &ranges)); 1710 PetscCall(MatGetBlockSize(sub, &bs)); 1711 for (PetscInt j = 0, start = mumps->id.blkptr[shift] + bs; j < ranges[i + 1] - ranges[i]; j += bs) PetscCall(PetscMUMPSIntCast(start + j, mumps->id.blkptr + shift + j / bs + 1)); 1712 shift += (ranges[i + 1] - ranges[i]) / bs; 1713 } 1714 } 1715 } 1716 } else mumps->id.ICNTL(15) = 0; 1717 } 1718 if (PetscDefined(USE_64BIT_INDICES)) PetscCall(PetscFree(pjcns_w)); 1719 for (PetscInt r = 0; r < nr; r++) PetscCall(ISRestoreIndices(rows[r], (const PetscInt **)&rows_idx[r])); 1720 for (PetscInt c = 0; c < nc; c++) PetscCall(ISRestoreIndices(cols[c], (const PetscInt **)&cols_idx[c])); 1721 PetscCall(PetscFree4(rows, cols, rows_idx, cols_idx)); 1722 if (!chol) PetscCheck(cumnnz == totnnz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Different number of nonzeros %" PetscCount_FMT " != %" PetscCount_FMT, cumnnz, totnnz); 1723 mumps->nest_vals_start[nr * nc] = cumnnz; 1724 1725 /* Set pointers for final MUMPS data structure */ 1726 mumps->nest_vals = vals; 1727 mumps->val_alloc = NULL; /* do not use val_alloc since it may be reallocated with the OMP callpath */ 1728 mumps->val = vals; 1729 mumps->irn = irns; 1730 mumps->jcn = jcns; 1731 mumps->nnz = cumnnz; 1732 } else { 1733 PetscScalar *oval = mumps->nest_vals; 1734 for (PetscInt r = 0; r < nr; r++) { 1735 for (PetscInt c = 0; c < nc; c++) { 1736 PetscBool conjugate = PETSC_FALSE; 1737 Mat sub = mats[r][c]; 1738 PetscScalar vscale = 1.0, vshift = 0.0; 1739 PetscInt midx = r * nc + c; 1740 1741 if (!mumps->nest_convert_to_triples[midx]) continue; 1742 PetscCall(MatGetTranspose_TransposeVirtual(&sub, &conjugate, &vshift, &vscale, NULL)); 1743 PetscCheck(vshift == 0.0, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Nonzero shift in parent MatShell"); 1744 mumps->val = oval + mumps->nest_vals_start[midx]; 1745 PetscCall((*mumps->nest_convert_to_triples[midx])(sub, shift, MAT_REUSE_MATRIX, mumps)); 1746 if (conjugate) { 1747 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1748 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] = vscale * PetscConj(mumps->val[k]); 1749 } else if (vscale != 1.0) { 1750 PetscCount nnz = mumps->nest_vals_start[midx + 1] - mumps->nest_vals_start[midx]; 1751 for (PetscCount k = 0; k < nnz; k++) mumps->val[k] *= vscale; 1752 } 1753 } 1754 } 1755 mumps->val = oval; 1756 } 1757 PetscFunctionReturn(PETSC_SUCCESS); 1758 } 1759 1760 static PetscErrorCode MatDestroy_MUMPS(Mat A) 1761 { 1762 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1763 1764 PetscFunctionBegin; 1765 PetscCall(PetscFree(mumps->id.isol_loc)); 1766 PetscCall(VecScatterDestroy(&mumps->scat_rhs)); 1767 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1768 PetscCall(VecDestroy(&mumps->b_seq)); 1769 PetscCall(VecDestroy(&mumps->x_seq)); 1770 PetscCall(PetscFree(mumps->id.perm_in)); 1771 PetscCall(PetscFree(mumps->id.blkvar)); 1772 PetscCall(PetscFree(mumps->id.blkptr)); 1773 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 1774 PetscCall(PetscFree(mumps->val_alloc)); 1775 PetscCall(PetscFree(mumps->info)); 1776 PetscCall(PetscFree(mumps->ICNTL_pre)); 1777 PetscCall(PetscFree(mumps->CNTL_pre)); 1778 PetscCall(MatMumpsResetSchur_Private(mumps)); 1779 if (mumps->id.job != JOB_NULL) { /* cannot call PetscMUMPS_c() if JOB_INIT has never been called for this instance */ 1780 mumps->id.job = JOB_END; 1781 PetscMUMPS_c(mumps); 1782 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in termination: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 1783 if (mumps->mumps_comm != MPI_COMM_NULL) { 1784 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) PetscCallMPI(MPI_Comm_free(&mumps->mumps_comm)); 1785 else PetscCall(PetscCommRestoreComm(PetscObjectComm((PetscObject)A), &mumps->mumps_comm)); 1786 } 1787 } 1788 PetscCall(MatMumpsFreeInternalID(&mumps->id)); 1789 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1790 if (mumps->use_petsc_omp_support) { 1791 PetscCall(PetscOmpCtrlDestroy(&mumps->omp_ctrl)); 1792 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1793 PetscCall(PetscFree3(mumps->rhs_nrow, mumps->rhs_recvcounts, mumps->rhs_disps)); 1794 } 1795 #endif 1796 PetscCall(PetscFree(mumps->ia_alloc)); 1797 PetscCall(PetscFree(mumps->ja_alloc)); 1798 PetscCall(PetscFree(mumps->recvcount)); 1799 PetscCall(PetscFree(mumps->reqs)); 1800 PetscCall(PetscFree(mumps->irhs_loc)); 1801 PetscCall(PetscFree2(mumps->nest_vals_start, mumps->nest_convert_to_triples)); 1802 PetscCall(PetscFree(mumps->nest_vals)); 1803 PetscCall(PetscFree(A->data)); 1804 1805 /* clear composed functions */ 1806 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorGetSolverType_C", NULL)); 1807 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorSetSchurIS_C", NULL)); 1808 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatFactorCreateSchurComplement_C", NULL)); 1809 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetIcntl_C", NULL)); 1810 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetIcntl_C", NULL)); 1811 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetCntl_C", NULL)); 1812 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetCntl_C", NULL)); 1813 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfo_C", NULL)); 1814 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInfog_C", NULL)); 1815 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfo_C", NULL)); 1816 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetRinfog_C", NULL)); 1817 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetNullPivots_C", NULL)); 1818 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverse_C", NULL)); 1819 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsGetInverseTranspose_C", NULL)); 1820 PetscCall(PetscObjectComposeFunction((PetscObject)A, "MatMumpsSetBlk_C", NULL)); 1821 PetscFunctionReturn(PETSC_SUCCESS); 1822 } 1823 1824 /* Set up the distributed RHS info for MUMPS. <nrhs> is the number of RHS. <array> points to start of RHS on the local processor. */ 1825 static PetscErrorCode MatMumpsSetUpDistRHSInfo(Mat A, PetscInt nrhs, const PetscScalar *array) 1826 { 1827 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1828 const PetscMPIInt ompsize = mumps->omp_comm_size; 1829 PetscInt i, m, M, rstart; 1830 1831 PetscFunctionBegin; 1832 PetscCall(MatGetSize(A, &M, NULL)); 1833 PetscCall(MatGetLocalSize(A, &m, NULL)); 1834 PetscCheck(M <= PETSC_MUMPS_INT_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscInt too long for PetscMUMPSInt"); 1835 if (ompsize == 1) { 1836 if (!mumps->irhs_loc) { 1837 mumps->nloc_rhs = (PetscMUMPSInt)m; 1838 PetscCall(PetscMalloc1(m, &mumps->irhs_loc)); 1839 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 1840 for (i = 0; i < m; i++) PetscCall(PetscMUMPSIntCast(rstart + i + 1, &mumps->irhs_loc[i])); /* use 1-based indices */ 1841 } 1842 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, m * nrhs, array, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1843 } else { 1844 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 1845 const PetscInt *ranges; 1846 PetscMPIInt j, k, sendcount, *petsc_ranks, *omp_ranks; 1847 MPI_Group petsc_group, omp_group; 1848 PetscScalar *recvbuf = NULL; 1849 1850 if (mumps->is_omp_master) { 1851 /* Lazily initialize the omp stuff for distributed rhs */ 1852 if (!mumps->irhs_loc) { 1853 PetscCall(PetscMalloc2(ompsize, &omp_ranks, ompsize, &petsc_ranks)); 1854 PetscCall(PetscMalloc3(ompsize, &mumps->rhs_nrow, ompsize, &mumps->rhs_recvcounts, ompsize, &mumps->rhs_disps)); 1855 PetscCallMPI(MPI_Comm_group(mumps->petsc_comm, &petsc_group)); 1856 PetscCallMPI(MPI_Comm_group(mumps->omp_comm, &omp_group)); 1857 for (j = 0; j < ompsize; j++) omp_ranks[j] = j; 1858 PetscCallMPI(MPI_Group_translate_ranks(omp_group, ompsize, omp_ranks, petsc_group, petsc_ranks)); 1859 1860 /* Populate mumps->irhs_loc[], rhs_nrow[] */ 1861 mumps->nloc_rhs = 0; 1862 PetscCall(MatGetOwnershipRanges(A, &ranges)); 1863 for (j = 0; j < ompsize; j++) { 1864 mumps->rhs_nrow[j] = ranges[petsc_ranks[j] + 1] - ranges[petsc_ranks[j]]; 1865 mumps->nloc_rhs += mumps->rhs_nrow[j]; 1866 } 1867 PetscCall(PetscMalloc1(mumps->nloc_rhs, &mumps->irhs_loc)); 1868 for (j = k = 0; j < ompsize; j++) { 1869 for (i = ranges[petsc_ranks[j]]; i < ranges[petsc_ranks[j] + 1]; i++, k++) PetscCall(PetscMUMPSIntCast(i + 1, &mumps->irhs_loc[k])); /* uses 1-based indices */ 1870 } 1871 1872 PetscCall(PetscFree2(omp_ranks, petsc_ranks)); 1873 PetscCallMPI(MPI_Group_free(&petsc_group)); 1874 PetscCallMPI(MPI_Group_free(&omp_group)); 1875 } 1876 1877 /* Realloc buffers when current nrhs is bigger than what we have met */ 1878 if (nrhs > mumps->max_nrhs) { 1879 PetscCall(PetscFree2(mumps->rhs_loc, mumps->rhs_recvbuf)); 1880 PetscCall(PetscMalloc2(mumps->nloc_rhs * nrhs, &mumps->rhs_loc, mumps->nloc_rhs * nrhs, &mumps->rhs_recvbuf)); 1881 mumps->max_nrhs = nrhs; 1882 } 1883 1884 /* Setup recvcounts[], disps[], recvbuf on omp rank 0 for the upcoming MPI_Gatherv */ 1885 for (j = 0; j < ompsize; j++) PetscCall(PetscMPIIntCast(mumps->rhs_nrow[j] * nrhs, &mumps->rhs_recvcounts[j])); 1886 mumps->rhs_disps[0] = 0; 1887 for (j = 1; j < ompsize; j++) { 1888 mumps->rhs_disps[j] = mumps->rhs_disps[j - 1] + mumps->rhs_recvcounts[j - 1]; 1889 PetscCheck(mumps->rhs_disps[j] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "PetscMPIInt overflow!"); 1890 } 1891 recvbuf = (nrhs == 1) ? mumps->rhs_loc : mumps->rhs_recvbuf; /* Directly use rhs_loc[] as recvbuf. Single rhs is common in Ax=b */ 1892 } 1893 1894 PetscCall(PetscMPIIntCast(m * nrhs, &sendcount)); 1895 PetscCallMPI(MPI_Gatherv(array, sendcount, MPIU_SCALAR, recvbuf, mumps->rhs_recvcounts, mumps->rhs_disps, MPIU_SCALAR, 0, mumps->omp_comm)); 1896 1897 if (mumps->is_omp_master) { 1898 if (nrhs > 1) { /* Copy & re-arrange data from rhs_recvbuf[] to mumps->rhs_loc[] only when there are multiple rhs */ 1899 PetscScalar *dst, *dstbase = mumps->rhs_loc; 1900 for (j = 0; j < ompsize; j++) { 1901 const PetscScalar *src = mumps->rhs_recvbuf + mumps->rhs_disps[j]; 1902 dst = dstbase; 1903 for (i = 0; i < nrhs; i++) { 1904 PetscCall(PetscArraycpy(dst, src, mumps->rhs_nrow[j])); 1905 src += mumps->rhs_nrow[j]; 1906 dst += mumps->nloc_rhs; 1907 } 1908 dstbase += mumps->rhs_nrow[j]; 1909 } 1910 } 1911 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nloc_rhs * nrhs, mumps->rhs_loc, mumps->id.precision, &mumps->id.rhs_loc_len, &mumps->id.rhs_loc)); 1912 } 1913 #endif /* PETSC_HAVE_OPENMP_SUPPORT */ 1914 } 1915 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 1916 mumps->id.nloc_rhs = (PetscMUMPSInt)mumps->nloc_rhs; 1917 mumps->id.lrhs_loc = mumps->nloc_rhs; 1918 mumps->id.irhs_loc = mumps->irhs_loc; 1919 PetscFunctionReturn(PETSC_SUCCESS); 1920 } 1921 1922 static PetscErrorCode MatSolve_MUMPS(Mat A, Vec b, Vec x) 1923 { 1924 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 1925 const PetscScalar *barray = NULL; 1926 PetscScalar *array; 1927 IS is_iden, is_petsc; 1928 PetscInt i; 1929 PetscBool second_solve = PETSC_FALSE; 1930 static PetscBool cite1 = PETSC_FALSE, cite2 = PETSC_FALSE; 1931 1932 PetscFunctionBegin; 1933 PetscCall(PetscCitationsRegister("@article{MUMPS01,\n author = {P.~R. Amestoy and I.~S. Duff and J.-Y. L'Excellent and J. Koster},\n title = {A fully asynchronous multifrontal solver using distributed dynamic scheduling},\n journal = {SIAM " 1934 "Journal on Matrix Analysis and Applications},\n volume = {23},\n number = {1},\n pages = {15--41},\n year = {2001}\n}\n", 1935 &cite1)); 1936 PetscCall(PetscCitationsRegister("@article{MUMPS02,\n author = {P.~R. Amestoy and A. Guermouche and J.-Y. L'Excellent and S. Pralet},\n title = {Hybrid scheduling for the parallel solution of linear systems},\n journal = {Parallel " 1937 "Computing},\n volume = {32},\n number = {2},\n pages = {136--156},\n year = {2006}\n}\n", 1938 &cite2)); 1939 1940 PetscCall(VecFlag(x, A->factorerrortype)); 1941 if (A->factorerrortype) { 1942 PetscCall(PetscInfo(A, "MatSolve is called with singular matrix factor, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 1943 PetscFunctionReturn(PETSC_SUCCESS); 1944 } 1945 1946 mumps->id.nrhs = 1; 1947 if (mumps->petsc_size > 1) { 1948 if (mumps->ICNTL20 == 10) { 1949 mumps->id.ICNTL(20) = 10; /* dense distributed RHS, need to set rhs_loc[], irhs_loc[] */ 1950 PetscCall(VecGetArrayRead(b, &barray)); 1951 PetscCall(MatMumpsSetUpDistRHSInfo(A, 1, barray)); 1952 } else { 1953 mumps->id.ICNTL(20) = 0; /* dense centralized RHS; Scatter b into a sequential b_seq vector*/ 1954 PetscCall(VecScatterBegin(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1955 PetscCall(VecScatterEnd(mumps->scat_rhs, b, mumps->b_seq, INSERT_VALUES, SCATTER_FORWARD)); 1956 if (!mumps->myid) { 1957 PetscCall(VecGetArray(mumps->b_seq, &array)); 1958 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->b_seq->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1959 } 1960 } 1961 } else { /* petsc_size == 1, use MUMPS's dense centralized RHS feature, so that we don't need to bother with isol_loc[] to get the solution */ 1962 mumps->id.ICNTL(20) = 0; 1963 PetscCall(VecCopy(b, x)); 1964 PetscCall(VecGetArray(x, &array)); 1965 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, x->map->n, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 1966 } 1967 1968 /* 1969 handle condensation step of Schur complement (if any) 1970 We set by default ICNTL(26) == -1 when Schur indices have been provided by the user. 1971 According to MUMPS (5.0.0) manual, any value should be harmful during the factorization phase 1972 Unless the user provides a valid value for ICNTL(26), MatSolve and MatMatSolve routines solve the full system. 1973 This requires an extra call to PetscMUMPS_c and the computation of the factors for S 1974 */ 1975 if (mumps->id.size_schur > 0) { 1976 PetscCheck(mumps->petsc_size <= 1, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 1977 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 1978 second_solve = PETSC_TRUE; 1979 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 1980 mumps->id.ICNTL(26) = 1; /* condensation phase */ 1981 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 1982 } 1983 1984 mumps->id.job = JOB_SOLVE; 1985 PetscMUMPS_c(mumps); // reduced solve, put solution in id.redrhs 1986 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 1987 1988 /* handle expansion step of Schur complement (if any) */ 1989 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 1990 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 1991 PetscCall(MatMumpsSolveSchur_Private(A)); 1992 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1] = ID_FIELD_GET(mumps->id, redrhs, i); 1993 } 1994 1995 if (mumps->petsc_size > 1) { /* convert mumps distributed solution to PETSc mpi x */ 1996 if (mumps->scat_sol && mumps->ICNTL9_pre != mumps->id.ICNTL(9)) { 1997 /* when id.ICNTL(9) changes, the contents of ilsol_loc may change (not its size, lsol_loc), recreates scat_sol */ 1998 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 1999 } 2000 if (!mumps->scat_sol) { /* create scatter scat_sol */ 2001 PetscInt *isol2_loc = NULL; 2002 PetscCall(ISCreateStride(PETSC_COMM_SELF, mumps->id.lsol_loc, 0, 1, &is_iden)); /* from */ 2003 PetscCall(PetscMalloc1(mumps->id.lsol_loc, &isol2_loc)); 2004 for (i = 0; i < mumps->id.lsol_loc; i++) isol2_loc[i] = mumps->id.isol_loc[i] - 1; /* change Fortran style to C style */ 2005 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, mumps->id.lsol_loc, isol2_loc, PETSC_OWN_POINTER, &is_petsc)); /* to */ 2006 PetscCall(VecScatterCreate(mumps->x_seq, is_iden, x, is_petsc, &mumps->scat_sol)); 2007 PetscCall(ISDestroy(&is_iden)); 2008 PetscCall(ISDestroy(&is_petsc)); 2009 mumps->ICNTL9_pre = mumps->id.ICNTL(9); /* save current value of id.ICNTL(9) */ 2010 } 2011 2012 PetscScalar *xarray; 2013 PetscCall(VecGetArray(mumps->x_seq, &xarray)); 2014 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.lsol_loc, mumps->id.precision, mumps->id.sol_loc, xarray)); 2015 PetscCall(VecRestoreArray(mumps->x_seq, &xarray)); 2016 PetscCall(VecScatterBegin(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2017 PetscCall(VecScatterEnd(mumps->scat_sol, mumps->x_seq, x, INSERT_VALUES, SCATTER_FORWARD)); 2018 2019 if (mumps->ICNTL20 == 10) { // distributed RHS 2020 PetscCall(VecRestoreArrayRead(b, &barray)); 2021 } else if (!mumps->myid) { // centralized RHS 2022 PetscCall(VecRestoreArray(mumps->b_seq, &array)); 2023 } 2024 } else { 2025 // id.rhs has the solution in mumps precision 2026 PetscCall(MatMumpsCastMumpsScalarArray(x->map->n, mumps->id.precision, mumps->id.rhs, array)); 2027 PetscCall(VecRestoreArray(x, &array)); 2028 } 2029 2030 PetscCall(PetscLogFlops(2.0 * PetscMax(0, (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatSolveTranspose_MUMPS(Mat A, Vec b, Vec x) 2035 { 2036 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2037 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2038 2039 PetscFunctionBegin; 2040 mumps->id.ICNTL(9) = 0; 2041 PetscCall(MatSolve_MUMPS(A, b, x)); 2042 mumps->id.ICNTL(9) = value; 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 static PetscErrorCode MatMatSolve_MUMPS(Mat A, Mat B, Mat X) 2047 { 2048 Mat Bt = NULL; 2049 PetscBool denseX, denseB, flg, flgT; 2050 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2051 PetscInt i, nrhs, M, nrhsM; 2052 PetscScalar *array; 2053 const PetscScalar *barray; 2054 PetscInt lsol_loc, nlsol_loc, *idxx, iidx = 0; 2055 PetscMUMPSInt *isol_loc, *isol_loc_save; 2056 PetscScalar *sol_loc; 2057 void *sol_loc_save; 2058 PetscCount sol_loc_len_save; 2059 IS is_to, is_from; 2060 PetscInt k, proc, j, m, myrstart; 2061 const PetscInt *rstart; 2062 Vec v_mpi, msol_loc; 2063 VecScatter scat_sol; 2064 Vec b_seq; 2065 VecScatter scat_rhs; 2066 PetscScalar *aa; 2067 PetscInt spnr, *ia, *ja; 2068 Mat_MPIAIJ *b = NULL; 2069 2070 PetscFunctionBegin; 2071 PetscCall(PetscObjectTypeCompareAny((PetscObject)X, &denseX, MATSEQDENSE, MATMPIDENSE, NULL)); 2072 PetscCheck(denseX, PetscObjectComm((PetscObject)X), PETSC_ERR_ARG_WRONG, "Matrix X must be MATDENSE matrix"); 2073 2074 PetscCall(PetscObjectTypeCompareAny((PetscObject)B, &denseB, MATSEQDENSE, MATMPIDENSE, NULL)); 2075 2076 if (denseB) { 2077 PetscCheck(B->rmap->n == X->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Matrix B and X must have same row distribution"); 2078 mumps->id.ICNTL(20) = 0; /* dense RHS */ 2079 } else { /* sparse B */ 2080 PetscCheck(X != B, PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_IDN, "X and B must be different matrices"); 2081 PetscCall(PetscObjectTypeCompare((PetscObject)B, MATTRANSPOSEVIRTUAL, &flgT)); 2082 PetscCheck(flgT, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONG, "Matrix B must be MATTRANSPOSEVIRTUAL matrix"); 2083 PetscCall(MatShellGetScalingShifts(B, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 2084 /* input B is transpose of actual RHS matrix, 2085 because mumps requires sparse compressed COLUMN storage! See MatMatTransposeSolve_MUMPS() */ 2086 PetscCall(MatTransposeGetMat(B, &Bt)); 2087 mumps->id.ICNTL(20) = 1; /* sparse RHS */ 2088 } 2089 2090 PetscCall(MatGetSize(B, &M, &nrhs)); 2091 PetscCall(PetscIntMultError(nrhs, M, &nrhsM)); 2092 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 2093 mumps->id.lrhs = (PetscMUMPSInt)M; 2094 2095 if (mumps->petsc_size == 1) { // handle this easy case specially and return early 2096 PetscScalar *aa; 2097 PetscInt spnr, *ia, *ja; 2098 PetscBool second_solve = PETSC_FALSE; 2099 2100 PetscCall(MatDenseGetArray(X, &array)); 2101 if (denseB) { 2102 /* copy B to X */ 2103 PetscCall(MatDenseGetArrayRead(B, &barray)); 2104 PetscCall(PetscArraycpy(array, barray, nrhsM)); 2105 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2106 } else { /* sparse B */ 2107 PetscCall(MatSeqAIJGetArray(Bt, &aa)); 2108 PetscCall(MatGetRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2109 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2110 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2111 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->id.nz_rhs, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2112 } 2113 PetscCall(MatMumpsMakeMumpsScalarArray(denseB, nrhsM, array, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2114 2115 /* handle condensation step of Schur complement (if any) */ 2116 if (mumps->id.size_schur > 0) { 2117 if (mumps->id.ICNTL(26) < 0 || mumps->id.ICNTL(26) > 2) { 2118 second_solve = PETSC_TRUE; 2119 PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); // allocate id.redrhs 2120 mumps->id.ICNTL(26) = 1; /* condensation phase, i.e, to solve id.redrhs */ 2121 } else if (mumps->id.ICNTL(26) == 1) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_FALSE)); 2122 } 2123 2124 mumps->id.job = JOB_SOLVE; 2125 PetscMUMPS_c(mumps); 2126 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2127 2128 /* handle expansion step of Schur complement (if any) */ 2129 if (second_solve) PetscCall(MatMumpsHandleSchur_Private(A, PETSC_TRUE)); 2130 else if (mumps->id.ICNTL(26) == 1) { // condense the right hand side 2131 PetscCall(MatMumpsSolveSchur_Private(A)); 2132 for (j = 0; j < nrhs; ++j) 2133 for (i = 0; i < mumps->id.size_schur; ++i) array[mumps->id.listvar_schur[i] - 1 + j * M] = ID_FIELD_GET(mumps->id, redrhs, i + j * mumps->id.lredrhs); 2134 } 2135 2136 if (!denseB) { /* sparse B, restore ia, ja */ 2137 PetscCall(MatSeqAIJRestoreArray(Bt, &aa)); 2138 PetscCall(MatRestoreRowIJ(Bt, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2139 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2140 } 2141 2142 // no matter dense B or sparse B, solution is in id.rhs; convert it to array of X. 2143 PetscCall(MatMumpsCastMumpsScalarArray(nrhsM, mumps->id.precision, mumps->id.rhs, array)); 2144 PetscCall(MatDenseRestoreArray(X, &array)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 /* parallel case: MUMPS requires rhs B to be centralized on the host! */ 2149 PetscCheck(!mumps->id.ICNTL(19), PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Parallel Schur complements not yet supported from PETSc"); 2150 2151 /* create msol_loc to hold mumps local solution */ 2152 isol_loc_save = mumps->id.isol_loc; /* save these, as we want to reuse them in MatSolve() */ 2153 sol_loc_save = mumps->id.sol_loc; 2154 sol_loc_len_save = mumps->id.sol_loc_len; 2155 mumps->id.isol_loc = NULL; // an init state 2156 mumps->id.sol_loc = NULL; 2157 mumps->id.sol_loc_len = 0; 2158 2159 lsol_loc = mumps->id.lsol_loc; 2160 PetscCall(PetscIntMultError(nrhs, lsol_loc, &nlsol_loc)); /* length of sol_loc */ 2161 PetscCall(PetscMalloc2(nlsol_loc, &sol_loc, lsol_loc, &isol_loc)); 2162 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, nlsol_loc, sol_loc, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2163 mumps->id.isol_loc = isol_loc; 2164 2165 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, nlsol_loc, (PetscScalar *)sol_loc, &msol_loc)); 2166 2167 if (denseB) { 2168 if (mumps->ICNTL20 == 10) { 2169 mumps->id.ICNTL(20) = 10; /* dense distributed RHS */ 2170 PetscCall(MatDenseGetArrayRead(B, &barray)); 2171 PetscCall(MatMumpsSetUpDistRHSInfo(A, nrhs, barray)); // put barray to rhs_loc 2172 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2173 PetscCall(MatGetLocalSize(B, &m, NULL)); 2174 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, NULL, &v_mpi)); // will scatter the solution to v_mpi, which wraps X 2175 } else { 2176 mumps->id.ICNTL(20) = 0; /* dense centralized RHS */ 2177 /* TODO: Because of non-contiguous indices, the created vecscatter scat_rhs is not done in MPI_Gather, resulting in 2178 very inefficient communication. An optimization is to use VecScatterCreateToZero to gather B to rank 0. Then on rank 2179 0, re-arrange B into desired order, which is a local operation. 2180 */ 2181 2182 /* scatter v_mpi to b_seq because MUMPS before 5.3.0 only supports centralized rhs */ 2183 /* wrap dense rhs matrix B into a vector v_mpi */ 2184 PetscCall(MatGetLocalSize(B, &m, NULL)); 2185 PetscCall(MatDenseGetArrayRead(B, &barray)); 2186 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2187 PetscCall(MatDenseRestoreArrayRead(B, &barray)); 2188 2189 /* scatter v_mpi to b_seq in proc[0]. With ICNTL(20) = 0, MUMPS requires rhs to be centralized on the host! */ 2190 if (!mumps->myid) { 2191 PetscInt *idx; 2192 /* idx: maps from k-th index of v_mpi to (i,j)-th global entry of B */ 2193 PetscCall(PetscMalloc1(nrhsM, &idx)); 2194 PetscCall(MatGetOwnershipRanges(B, &rstart)); 2195 for (proc = 0, k = 0; proc < mumps->petsc_size; proc++) { 2196 for (j = 0; j < nrhs; j++) { 2197 for (i = rstart[proc]; i < rstart[proc + 1]; i++) idx[k++] = j * M + i; 2198 } 2199 } 2200 2201 PetscCall(VecCreateSeq(PETSC_COMM_SELF, nrhsM, &b_seq)); 2202 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nrhsM, idx, PETSC_OWN_POINTER, &is_to)); 2203 PetscCall(ISCreateStride(PETSC_COMM_SELF, nrhsM, 0, 1, &is_from)); 2204 } else { 2205 PetscCall(VecCreateSeq(PETSC_COMM_SELF, 0, &b_seq)); 2206 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_to)); 2207 PetscCall(ISCreateStride(PETSC_COMM_SELF, 0, 0, 1, &is_from)); 2208 } 2209 2210 PetscCall(VecScatterCreate(v_mpi, is_from, b_seq, is_to, &scat_rhs)); 2211 PetscCall(VecScatterBegin(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2212 PetscCall(ISDestroy(&is_to)); 2213 PetscCall(ISDestroy(&is_from)); 2214 PetscCall(VecScatterEnd(scat_rhs, v_mpi, b_seq, INSERT_VALUES, SCATTER_FORWARD)); 2215 2216 if (!mumps->myid) { /* define rhs on the host */ 2217 PetscCall(VecGetArrayRead(b_seq, &barray)); 2218 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, nrhsM, barray, mumps->id.precision, &mumps->id.rhs_len, &mumps->id.rhs)); 2219 PetscCall(VecRestoreArrayRead(b_seq, &barray)); 2220 } 2221 } 2222 } else { /* sparse B */ 2223 b = (Mat_MPIAIJ *)Bt->data; 2224 2225 /* wrap dense X into a vector v_mpi */ 2226 PetscCall(MatGetLocalSize(X, &m, NULL)); 2227 PetscCall(MatDenseGetArrayRead(X, &barray)); 2228 PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)X), 1, nrhs * m, nrhsM, barray, &v_mpi)); 2229 PetscCall(MatDenseRestoreArrayRead(X, &barray)); 2230 2231 if (!mumps->myid) { 2232 PetscCall(MatSeqAIJGetArray(b->A, &aa)); 2233 PetscCall(MatGetRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2234 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 2235 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 2236 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)b->A->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 2237 } else { 2238 mumps->id.irhs_ptr = NULL; 2239 mumps->id.irhs_sparse = NULL; 2240 mumps->id.nz_rhs = 0; 2241 if (mumps->id.rhs_sparse_len) { 2242 PetscCall(PetscFree(mumps->id.rhs_sparse)); 2243 mumps->id.rhs_sparse_len = 0; 2244 } 2245 } 2246 } 2247 2248 /* solve phase */ 2249 mumps->id.job = JOB_SOLVE; 2250 PetscMUMPS_c(mumps); 2251 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2252 2253 /* scatter mumps distributed solution to PETSc vector v_mpi, which shares local arrays with solution matrix X */ 2254 PetscCall(MatDenseGetArray(X, &array)); 2255 PetscCall(VecPlaceArray(v_mpi, array)); 2256 2257 /* create scatter scat_sol */ 2258 PetscCall(MatGetOwnershipRanges(X, &rstart)); 2259 /* iidx: index for scatter mumps solution to PETSc X */ 2260 2261 PetscCall(ISCreateStride(PETSC_COMM_SELF, nlsol_loc, 0, 1, &is_from)); 2262 PetscCall(PetscMalloc1(nlsol_loc, &idxx)); 2263 for (i = 0; i < lsol_loc; i++) { 2264 isol_loc[i] -= 1; /* change Fortran style to C style. isol_loc[i+j*lsol_loc] contains x[isol_loc[i]] in j-th vector */ 2265 2266 for (proc = 0; proc < mumps->petsc_size; proc++) { 2267 if (isol_loc[i] >= rstart[proc] && isol_loc[i] < rstart[proc + 1]) { 2268 myrstart = rstart[proc]; 2269 k = isol_loc[i] - myrstart; /* local index on 1st column of PETSc vector X */ 2270 iidx = k + myrstart * nrhs; /* maps mumps isol_loc[i] to PETSc index in X */ 2271 m = rstart[proc + 1] - rstart[proc]; /* rows of X for this proc */ 2272 break; 2273 } 2274 } 2275 2276 for (j = 0; j < nrhs; j++) idxx[i + j * lsol_loc] = iidx + j * m; 2277 } 2278 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nlsol_loc, idxx, PETSC_COPY_VALUES, &is_to)); 2279 PetscCall(MatMumpsCastMumpsScalarArray(nlsol_loc, mumps->id.precision, mumps->id.sol_loc, sol_loc)); // Vec msol_loc is created with sol_loc[] 2280 PetscCall(VecScatterCreate(msol_loc, is_from, v_mpi, is_to, &scat_sol)); 2281 PetscCall(VecScatterBegin(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2282 PetscCall(ISDestroy(&is_from)); 2283 PetscCall(ISDestroy(&is_to)); 2284 PetscCall(VecScatterEnd(scat_sol, msol_loc, v_mpi, INSERT_VALUES, SCATTER_FORWARD)); 2285 PetscCall(MatDenseRestoreArray(X, &array)); 2286 2287 if (mumps->id.sol_loc_len) { // in case we allocated intermediate buffers 2288 mumps->id.sol_loc_len = 0; 2289 PetscCall(PetscFree(mumps->id.sol_loc)); 2290 } 2291 2292 // restore old values 2293 mumps->id.sol_loc = sol_loc_save; 2294 mumps->id.sol_loc_len = sol_loc_len_save; 2295 mumps->id.isol_loc = isol_loc_save; 2296 2297 PetscCall(PetscFree2(sol_loc, isol_loc)); 2298 PetscCall(PetscFree(idxx)); 2299 PetscCall(VecDestroy(&msol_loc)); 2300 PetscCall(VecDestroy(&v_mpi)); 2301 if (!denseB) { 2302 if (!mumps->myid) { 2303 b = (Mat_MPIAIJ *)Bt->data; 2304 PetscCall(MatSeqAIJRestoreArray(b->A, &aa)); 2305 PetscCall(MatRestoreRowIJ(b->A, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 2306 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot restore IJ structure"); 2307 } 2308 } else { 2309 if (mumps->ICNTL20 == 0) { 2310 PetscCall(VecDestroy(&b_seq)); 2311 PetscCall(VecScatterDestroy(&scat_rhs)); 2312 } 2313 } 2314 PetscCall(VecScatterDestroy(&scat_sol)); 2315 PetscCall(PetscLogFlops(nrhs * PetscMax(0, 2.0 * (mumps->id.INFO(28) >= 0 ? mumps->id.INFO(28) : -1000000 * mumps->id.INFO(28)) - A->cmap->n))); 2316 PetscFunctionReturn(PETSC_SUCCESS); 2317 } 2318 2319 static PetscErrorCode MatMatSolveTranspose_MUMPS(Mat A, Mat B, Mat X) 2320 { 2321 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 2322 const PetscMUMPSInt value = mumps->id.ICNTL(9); 2323 2324 PetscFunctionBegin; 2325 mumps->id.ICNTL(9) = 0; 2326 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2327 mumps->id.ICNTL(9) = value; 2328 PetscFunctionReturn(PETSC_SUCCESS); 2329 } 2330 2331 static PetscErrorCode MatMatTransposeSolve_MUMPS(Mat A, Mat Bt, Mat X) 2332 { 2333 PetscBool flg; 2334 Mat B; 2335 2336 PetscFunctionBegin; 2337 PetscCall(PetscObjectTypeCompareAny((PetscObject)Bt, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 2338 PetscCheck(flg, PetscObjectComm((PetscObject)Bt), PETSC_ERR_ARG_WRONG, "Matrix Bt must be MATAIJ matrix"); 2339 2340 /* Create B=Bt^T that uses Bt's data structure */ 2341 PetscCall(MatCreateTranspose(Bt, &B)); 2342 2343 PetscCall(MatMatSolve_MUMPS(A, B, X)); 2344 PetscCall(MatDestroy(&B)); 2345 PetscFunctionReturn(PETSC_SUCCESS); 2346 } 2347 2348 #if !defined(PETSC_USE_COMPLEX) 2349 /* 2350 input: 2351 F: numeric factor 2352 output: 2353 nneg: total number of negative pivots 2354 nzero: total number of zero pivots 2355 npos: (global dimension of F) - nneg - nzero 2356 */ 2357 static PetscErrorCode MatGetInertia_SBAIJMUMPS(Mat F, PetscInt *nneg, PetscInt *nzero, PetscInt *npos) 2358 { 2359 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2360 PetscMPIInt size; 2361 2362 PetscFunctionBegin; 2363 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)F), &size)); 2364 /* MUMPS 4.3.1 calls ScaLAPACK when ICNTL(13)=0 (default), which does not offer the possibility to compute the inertia of a dense matrix. Set ICNTL(13)=1 to skip ScaLAPACK */ 2365 PetscCheck(size <= 1 || mumps->id.ICNTL(13) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "ICNTL(13)=%d. -mat_mumps_icntl_13 must be set as 1 for correct global matrix inertia", mumps->id.INFOG(13)); 2366 2367 if (nneg) *nneg = mumps->id.INFOG(12); 2368 if (nzero || npos) { 2369 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 2370 if (nzero) *nzero = mumps->id.INFOG(28); 2371 if (npos) *npos = F->rmap->N - (mumps->id.INFOG(12) + mumps->id.INFOG(28)); 2372 } 2373 PetscFunctionReturn(PETSC_SUCCESS); 2374 } 2375 #endif 2376 2377 static PetscErrorCode MatMumpsGatherNonzerosOnMaster(MatReuse reuse, Mat_MUMPS *mumps) 2378 { 2379 PetscMPIInt nreqs; 2380 PetscMUMPSInt *irn, *jcn; 2381 PetscMPIInt count; 2382 PetscCount totnnz, remain; 2383 const PetscInt osize = mumps->omp_comm_size; 2384 PetscScalar *val; 2385 2386 PetscFunctionBegin; 2387 if (osize > 1) { 2388 if (reuse == MAT_INITIAL_MATRIX) { 2389 /* master first gathers counts of nonzeros to receive */ 2390 if (mumps->is_omp_master) PetscCall(PetscMalloc1(osize, &mumps->recvcount)); 2391 PetscCallMPI(MPI_Gather(&mumps->nnz, 1, MPIU_INT64, mumps->recvcount, 1, MPIU_INT64, 0 /*master*/, mumps->omp_comm)); 2392 2393 /* Then each computes number of send/recvs */ 2394 if (mumps->is_omp_master) { 2395 /* Start from 1 since self communication is not done in MPI */ 2396 nreqs = 0; 2397 for (PetscMPIInt i = 1; i < osize; i++) nreqs += (mumps->recvcount[i] + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX; 2398 } else { 2399 nreqs = (PetscMPIInt)(((mumps->nnz + PETSC_MPI_INT_MAX - 1) / PETSC_MPI_INT_MAX)); 2400 } 2401 PetscCall(PetscMalloc1(nreqs * 3, &mumps->reqs)); /* Triple the requests since we send irn, jcn and val separately */ 2402 2403 /* The following code is doing a very simple thing: omp_master rank gathers irn/jcn/val from others. 2404 MPI_Gatherv would be enough if it supports big counts > 2^31-1. Since it does not, and mumps->nnz 2405 might be a prime number > 2^31-1, we have to slice the message. Note omp_comm_size 2406 is very small, the current approach should have no extra overhead compared to MPI_Gatherv. 2407 */ 2408 nreqs = 0; /* counter for actual send/recvs */ 2409 if (mumps->is_omp_master) { 2410 totnnz = 0; 2411 2412 for (PetscMPIInt i = 0; i < osize; i++) totnnz += mumps->recvcount[i]; /* totnnz = sum of nnz over omp_comm */ 2413 PetscCall(PetscMalloc2(totnnz, &irn, totnnz, &jcn)); 2414 PetscCall(PetscMalloc1(totnnz, &val)); 2415 2416 /* Self communication */ 2417 PetscCall(PetscArraycpy(irn, mumps->irn, mumps->nnz)); 2418 PetscCall(PetscArraycpy(jcn, mumps->jcn, mumps->nnz)); 2419 PetscCall(PetscArraycpy(val, mumps->val, mumps->nnz)); 2420 2421 /* Replace mumps->irn/jcn etc on master with the newly allocated bigger arrays */ 2422 PetscCall(PetscFree2(mumps->irn, mumps->jcn)); 2423 PetscCall(PetscFree(mumps->val_alloc)); 2424 mumps->nnz = totnnz; 2425 mumps->irn = irn; 2426 mumps->jcn = jcn; 2427 mumps->val = mumps->val_alloc = val; 2428 2429 irn += mumps->recvcount[0]; /* recvcount[0] is old mumps->nnz on omp rank 0 */ 2430 jcn += mumps->recvcount[0]; 2431 val += mumps->recvcount[0]; 2432 2433 /* Remote communication */ 2434 for (PetscMPIInt i = 1; i < osize; i++) { 2435 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2436 remain = mumps->recvcount[i] - count; 2437 while (count > 0) { 2438 PetscCallMPI(MPIU_Irecv(irn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2439 PetscCallMPI(MPIU_Irecv(jcn, count, MPIU_MUMPSINT, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2440 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2441 irn += count; 2442 jcn += count; 2443 val += count; 2444 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2445 remain -= count; 2446 } 2447 } 2448 } else { 2449 irn = mumps->irn; 2450 jcn = mumps->jcn; 2451 val = mumps->val; 2452 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2453 remain = mumps->nnz - count; 2454 while (count > 0) { 2455 PetscCallMPI(MPIU_Isend(irn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2456 PetscCallMPI(MPIU_Isend(jcn, count, MPIU_MUMPSINT, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2457 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2458 irn += count; 2459 jcn += count; 2460 val += count; 2461 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2462 remain -= count; 2463 } 2464 } 2465 } else { 2466 nreqs = 0; 2467 if (mumps->is_omp_master) { 2468 val = mumps->val + mumps->recvcount[0]; 2469 for (PetscMPIInt i = 1; i < osize; i++) { /* Remote communication only since self data is already in place */ 2470 count = (PetscMPIInt)PetscMin(mumps->recvcount[i], (PetscMPIInt)PETSC_MPI_INT_MAX); 2471 remain = mumps->recvcount[i] - count; 2472 while (count > 0) { 2473 PetscCallMPI(MPIU_Irecv(val, count, MPIU_SCALAR, i, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2474 val += count; 2475 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2476 remain -= count; 2477 } 2478 } 2479 } else { 2480 val = mumps->val; 2481 count = (PetscMPIInt)PetscMin(mumps->nnz, (PetscMPIInt)PETSC_MPI_INT_MAX); 2482 remain = mumps->nnz - count; 2483 while (count > 0) { 2484 PetscCallMPI(MPIU_Isend(val, count, MPIU_SCALAR, 0, mumps->tag, mumps->omp_comm, &mumps->reqs[nreqs++])); 2485 val += count; 2486 count = (PetscMPIInt)PetscMin(remain, (PetscMPIInt)PETSC_MPI_INT_MAX); 2487 remain -= count; 2488 } 2489 } 2490 } 2491 PetscCallMPI(MPI_Waitall(nreqs, mumps->reqs, MPI_STATUSES_IGNORE)); 2492 mumps->tag++; /* It is totally fine for above send/recvs to share one mpi tag */ 2493 } 2494 PetscFunctionReturn(PETSC_SUCCESS); 2495 } 2496 2497 static PetscErrorCode MatFactorNumeric_MUMPS(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info) 2498 { 2499 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2500 2501 PetscFunctionBegin; 2502 if (mumps->id.INFOG(1) < 0 && !(mumps->id.INFOG(1) == -16 && mumps->id.INFOG(1) == 0)) { 2503 if (mumps->id.INFOG(1) == -6) PetscCall(PetscInfo(A, "MatFactorNumeric is called with singular matrix structure, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2504 PetscCall(PetscInfo(A, "MatFactorNumeric is called after analysis phase fails, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2505 PetscFunctionReturn(PETSC_SUCCESS); 2506 } 2507 2508 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_REUSE_MATRIX, mumps)); 2509 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_REUSE_MATRIX, mumps)); 2510 2511 /* numerical factorization phase */ 2512 mumps->id.job = JOB_FACTNUMERIC; 2513 if (!mumps->id.ICNTL(18)) { /* A is centralized */ 2514 if (!mumps->myid) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2515 } else { 2516 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2517 } 2518 2519 if (F->schur) { 2520 const PetscScalar *array; 2521 MUMPS_INT size = mumps->id.size_schur; 2522 PetscCall(MatDenseGetArrayRead(F->schur, &array)); 2523 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, size * size, array, mumps->id.precision, &mumps->id.schur_len, &mumps->id.schur)); 2524 PetscCall(MatDenseRestoreArrayRead(F->schur, &array)); 2525 } 2526 2527 PetscMUMPS_c(mumps); 2528 if (mumps->id.INFOG(1) < 0) { 2529 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 2530 if (mumps->id.INFOG(1) == -10) { 2531 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: matrix is numerically singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2532 F->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT; 2533 } else if (mumps->id.INFOG(1) == -13) { 2534 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, cannot allocate required memory %d megabytes\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2535 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2536 } else if (mumps->id.INFOG(1) == -8 || mumps->id.INFOG(1) == -9 || (-16 < mumps->id.INFOG(1) && mumps->id.INFOG(1) < -10)) { 2537 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d, problem with work array\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2538 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2539 } else { 2540 PetscCall(PetscInfo(F, "MUMPS error in numerical factorization: INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2541 F->factorerrortype = MAT_FACTOR_OTHER; 2542 } 2543 } 2544 PetscCheck(mumps->myid || mumps->id.ICNTL(16) <= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in numerical factorization: ICNTL(16)=%d " MUMPS_MANUALS, mumps->id.INFOG(16)); 2545 2546 F->assembled = PETSC_TRUE; 2547 2548 if (F->schur) { /* reset Schur status to unfactored */ 2549 #if defined(PETSC_HAVE_CUDA) 2550 F->schur->offloadmask = PETSC_OFFLOAD_CPU; 2551 #endif 2552 PetscScalar *array; 2553 PetscCall(MatDenseGetArray(F->schur, &array)); 2554 PetscCall(MatMumpsCastMumpsScalarArray(mumps->id.size_schur * mumps->id.size_schur, mumps->id.precision, mumps->id.schur, array)); 2555 PetscCall(MatDenseRestoreArray(F->schur, &array)); 2556 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 2557 mumps->id.ICNTL(19) = 2; 2558 PetscCall(MatTranspose(F->schur, MAT_INPLACE_MATRIX, &F->schur)); 2559 } 2560 PetscCall(MatFactorRestoreSchurComplement(F, NULL, MAT_FACTOR_SCHUR_UNFACTORED)); 2561 } 2562 2563 /* just to be sure that ICNTL(19) value returned by a call from MatMumpsGetIcntl is always consistent */ 2564 if (!mumps->sym && mumps->id.ICNTL(19) && mumps->id.ICNTL(19) != 1) mumps->id.ICNTL(19) = 3; 2565 2566 if (!mumps->is_omp_master) mumps->id.INFO(23) = 0; 2567 // MUMPS userguide: ISOL_loc should be allocated by the user between the factorization and the 2568 // solve phases. On exit from the solve phase, ISOL_loc(i) contains the index of the variables for 2569 // which the solution (in SOL_loc) is available on the local processor. 2570 // If successive calls to the solve phase (JOB= 3) are performed for a given matrix, ISOL_loc will 2571 // normally have the same contents for each of these calls. The only exception is the case of 2572 // unsymmetric matrices (SYM=1) when the transpose option is changed (see ICNTL(9)) and non 2573 // symmetric row/column exchanges (see ICNTL(6)) have occurred before the solve phase. 2574 if (mumps->petsc_size > 1) { 2575 PetscInt lsol_loc; 2576 PetscScalar *array; 2577 2578 /* distributed solution; Create x_seq=sol_loc for repeated use */ 2579 if (mumps->x_seq) { 2580 PetscCall(VecScatterDestroy(&mumps->scat_sol)); 2581 PetscCall(PetscFree(mumps->id.isol_loc)); 2582 PetscCall(VecDestroy(&mumps->x_seq)); 2583 } 2584 lsol_loc = mumps->id.INFO(23); /* length of sol_loc */ 2585 PetscCall(PetscMalloc1(lsol_loc, &mumps->id.isol_loc)); 2586 PetscCall(VecCreateSeq(PETSC_COMM_SELF, lsol_loc, &mumps->x_seq)); 2587 PetscCall(VecGetArray(mumps->x_seq, &array)); 2588 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_FALSE, lsol_loc, array, mumps->id.precision, &mumps->id.sol_loc_len, &mumps->id.sol_loc)); 2589 PetscCall(VecRestoreArray(mumps->x_seq, &array)); 2590 mumps->id.lsol_loc = (PetscMUMPSInt)lsol_loc; 2591 } 2592 PetscCall(PetscLogFlops((double)ID_RINFO_GET(mumps->id, 2))); 2593 PetscFunctionReturn(PETSC_SUCCESS); 2594 } 2595 2596 /* Sets MUMPS options from the options database */ 2597 static PetscErrorCode MatSetFromOptions_MUMPS(Mat F, Mat A) 2598 { 2599 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2600 PetscReal cntl; 2601 PetscMUMPSInt icntl = 0, size, *listvar_schur; 2602 PetscInt info[80], i, ninfo = 80, rbs, cbs; 2603 PetscBool flg = PETSC_FALSE; 2604 PetscBool schur = mumps->id.icntl ? (PetscBool)(mumps->id.ICNTL(26) == -1) : (PetscBool)(mumps->ICNTL26 == -1); 2605 void *arr; 2606 2607 PetscFunctionBegin; 2608 PetscOptionsBegin(PetscObjectComm((PetscObject)F), ((PetscObject)F)->prefix, "MUMPS Options", "Mat"); 2609 if (mumps->id.job == JOB_NULL) { /* MatSetFromOptions_MUMPS() has never been called before */ 2610 PetscPrecision precision = PetscDefined(USE_REAL_SINGLE) ? PETSC_PRECISION_SINGLE : PETSC_PRECISION_DOUBLE; 2611 PetscInt nthreads = 0; 2612 PetscInt nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 2613 PetscInt nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 2614 PetscMUMPSInt nblk, *blkvar, *blkptr; 2615 2616 mumps->petsc_comm = PetscObjectComm((PetscObject)A); 2617 PetscCallMPI(MPI_Comm_size(mumps->petsc_comm, &mumps->petsc_size)); 2618 PetscCallMPI(MPI_Comm_rank(mumps->petsc_comm, &mumps->myid)); /* "if (!myid)" still works even if mumps_comm is different */ 2619 2620 PetscCall(PetscOptionsName("-mat_mumps_use_omp_threads", "Convert MPI processes into OpenMP threads", "None", &mumps->use_petsc_omp_support)); 2621 if (mumps->use_petsc_omp_support) nthreads = -1; /* -1 will let PetscOmpCtrlCreate() guess a proper value when user did not supply one */ 2622 /* do not use PetscOptionsInt() so that the option -mat_mumps_use_omp_threads is not displayed twice in the help */ 2623 PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)F)->prefix, "-mat_mumps_use_omp_threads", &nthreads, NULL)); 2624 if (mumps->use_petsc_omp_support) { 2625 PetscCheck(!schur, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use -%smat_mumps_use_omp_threads with the Schur complement feature", ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2626 #if defined(PETSC_HAVE_OPENMP_SUPPORT) 2627 PetscCall(PetscOmpCtrlCreate(mumps->petsc_comm, nthreads, &mumps->omp_ctrl)); 2628 PetscCall(PetscOmpCtrlGetOmpComms(mumps->omp_ctrl, &mumps->omp_comm, &mumps->mumps_comm, &mumps->is_omp_master)); 2629 #else 2630 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "The system does not have PETSc OpenMP support but you added the -%smat_mumps_use_omp_threads option. Configure PETSc with --with-openmp --download-hwloc (or --with-hwloc) to enable it, see more in MATSOLVERMUMPS manual", 2631 ((PetscObject)F)->prefix ? ((PetscObject)F)->prefix : ""); 2632 #endif 2633 } else { 2634 mumps->omp_comm = PETSC_COMM_SELF; 2635 mumps->mumps_comm = mumps->petsc_comm; 2636 mumps->is_omp_master = PETSC_TRUE; 2637 } 2638 PetscCallMPI(MPI_Comm_size(mumps->omp_comm, &mumps->omp_comm_size)); 2639 mumps->reqs = NULL; 2640 mumps->tag = 0; 2641 2642 if (mumps->mumps_comm != MPI_COMM_NULL) { 2643 if (PetscDefined(HAVE_OPENMP_SUPPORT) && mumps->use_petsc_omp_support) { 2644 /* It looks like MUMPS does not dup the input comm. Dup a new comm for MUMPS to avoid any tag mismatches. */ 2645 MPI_Comm comm; 2646 PetscCallMPI(MPI_Comm_dup(mumps->mumps_comm, &comm)); 2647 mumps->mumps_comm = comm; 2648 } else PetscCall(PetscCommGetComm(mumps->petsc_comm, &mumps->mumps_comm)); 2649 } 2650 2651 mumps->id.comm_fortran = MPI_Comm_c2f(mumps->mumps_comm); 2652 mumps->id.job = JOB_INIT; 2653 mumps->id.par = 1; /* host participates factorizaton and solve */ 2654 mumps->id.sym = mumps->sym; 2655 2656 size = mumps->id.size_schur; 2657 arr = mumps->id.schur; 2658 listvar_schur = mumps->id.listvar_schur; 2659 nblk = mumps->id.nblk; 2660 blkvar = mumps->id.blkvar; 2661 blkptr = mumps->id.blkptr; 2662 if (PetscDefined(USE_DEBUG)) { 2663 for (PetscInt i = 0; i < size; i++) 2664 PetscCheck(listvar_schur[i] - 1 >= 0 && listvar_schur[i] - 1 < A->rmap->N, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid Schur index at position %" PetscInt_FMT "! %" PetscInt_FMT " must be in [0, %" PetscInt_FMT ")", i, (PetscInt)listvar_schur[i] - 1, 2665 A->rmap->N); 2666 } 2667 2668 PetscCall(PetscOptionsEnum("-pc_precision", "Precision used by MUMPS", "MATSOLVERMUMPS", PetscPrecisionTypes, (PetscEnum)precision, (PetscEnum *)&precision, NULL)); 2669 PetscCheck(precision == PETSC_PRECISION_SINGLE || precision == PETSC_PRECISION_DOUBLE, PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "MUMPS does not support %s precision", PetscPrecisionTypes[precision]); 2670 PetscCheck(precision == PETSC_SCALAR_PRECISION || PetscDefined(HAVE_MUMPS_MIXED_PRECISION), PetscObjectComm((PetscObject)F), PETSC_ERR_USER, "Your MUMPS library does not support mixed precision, but which is needed with your specified PetscScalar"); 2671 PetscCall(MatMumpsAllocateInternalID(&mumps->id, precision)); 2672 2673 PetscMUMPS_c(mumps); 2674 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2675 2676 /* set PETSc-MUMPS default options - override MUMPS default */ 2677 mumps->id.ICNTL(3) = 0; 2678 mumps->id.ICNTL(4) = 0; 2679 if (mumps->petsc_size == 1) { 2680 mumps->id.ICNTL(18) = 0; /* centralized assembled matrix input */ 2681 mumps->id.ICNTL(7) = 7; /* automatic choice of ordering done by the package */ 2682 } else { 2683 mumps->id.ICNTL(18) = 3; /* distributed assembled matrix input */ 2684 mumps->id.ICNTL(21) = 1; /* distributed solution */ 2685 } 2686 if (nblk && blkptr) { 2687 mumps->id.ICNTL(15) = 1; 2688 mumps->id.nblk = nblk; 2689 mumps->id.blkvar = blkvar; 2690 mumps->id.blkptr = blkptr; 2691 } else mumps->id.ICNTL(15) = 0; 2692 2693 /* restore cached ICNTL and CNTL values */ 2694 for (icntl = 0; icntl < nICNTL_pre; ++icntl) mumps->id.ICNTL(mumps->ICNTL_pre[1 + 2 * icntl]) = mumps->ICNTL_pre[2 + 2 * icntl]; 2695 for (icntl = 0; icntl < nCNTL_pre; ++icntl) ID_CNTL_SET(mumps->id, (PetscInt)mumps->CNTL_pre[1 + 2 * icntl], mumps->CNTL_pre[2 + 2 * icntl]); 2696 2697 PetscCall(PetscFree(mumps->ICNTL_pre)); 2698 PetscCall(PetscFree(mumps->CNTL_pre)); 2699 2700 if (schur) { 2701 mumps->id.size_schur = size; 2702 mumps->id.schur_lld = size; 2703 mumps->id.schur = arr; 2704 mumps->id.listvar_schur = listvar_schur; 2705 if (mumps->petsc_size > 1) { 2706 PetscBool gs; /* gs is false if any rank other than root has non-empty IS */ 2707 2708 mumps->id.ICNTL(19) = 1; /* MUMPS returns Schur centralized on the host */ 2709 gs = mumps->myid ? (mumps->id.size_schur ? PETSC_FALSE : PETSC_TRUE) : PETSC_TRUE; /* always true on root; false on others if their size != 0 */ 2710 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gs, 1, MPI_C_BOOL, MPI_LAND, mumps->petsc_comm)); 2711 PetscCheck(gs, PETSC_COMM_SELF, PETSC_ERR_SUP, "MUMPS distributed parallel Schur complements not yet supported from PETSc"); 2712 } else { 2713 if (F->factortype == MAT_FACTOR_LU) { 2714 mumps->id.ICNTL(19) = 3; /* MUMPS returns full matrix */ 2715 } else { 2716 mumps->id.ICNTL(19) = 2; /* MUMPS returns lower triangular part */ 2717 } 2718 } 2719 mumps->id.ICNTL(26) = -1; 2720 } 2721 2722 /* copy MUMPS default control values from master to slaves. Although slaves do not call MUMPS, they may access these values in code. 2723 For example, ICNTL(9) is initialized to 1 by MUMPS and slaves check ICNTL(9) in MatSolve_MUMPS. 2724 */ 2725 PetscCallMPI(MPI_Bcast(mumps->id.icntl, 40, MPI_INT, 0, mumps->omp_comm)); 2726 PetscCallMPI(MPI_Bcast(mumps->id.cntl, 15, MPIU_MUMPSREAL(&mumps->id), 0, mumps->omp_comm)); 2727 2728 mumps->scat_rhs = NULL; 2729 mumps->scat_sol = NULL; 2730 } 2731 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_1", "ICNTL(1): output stream for error messages", "None", mumps->id.ICNTL(1), &icntl, &flg)); 2732 if (flg) mumps->id.ICNTL(1) = icntl; 2733 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_2", "ICNTL(2): output stream for diagnostic printing, statistics, and warning", "None", mumps->id.ICNTL(2), &icntl, &flg)); 2734 if (flg) mumps->id.ICNTL(2) = icntl; 2735 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_3", "ICNTL(3): output stream for global information, collected on the host", "None", mumps->id.ICNTL(3), &icntl, &flg)); 2736 if (flg) mumps->id.ICNTL(3) = icntl; 2737 2738 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_4", "ICNTL(4): level of printing (0 to 4)", "None", mumps->id.ICNTL(4), &icntl, &flg)); 2739 if (flg) mumps->id.ICNTL(4) = icntl; 2740 if (mumps->id.ICNTL(4) || PetscLogPrintInfo) mumps->id.ICNTL(3) = 6; /* resume MUMPS default id.ICNTL(3) = 6 */ 2741 2742 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_6", "ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7)", "None", mumps->id.ICNTL(6), &icntl, &flg)); 2743 if (flg) mumps->id.ICNTL(6) = icntl; 2744 2745 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_7", "ICNTL(7): computes a symmetric permutation in sequential analysis. 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto(default)", "None", mumps->id.ICNTL(7), &icntl, &flg)); 2746 if (flg) { 2747 PetscCheck(icntl != 1 && icntl >= 0 && icntl <= 7, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Valid values are 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto"); 2748 mumps->id.ICNTL(7) = icntl; 2749 } 2750 2751 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_8", "ICNTL(8): scaling strategy (-2 to 8 or 77)", "None", mumps->id.ICNTL(8), &mumps->id.ICNTL(8), NULL)); 2752 /* PetscCall(PetscOptionsInt("-mat_mumps_icntl_9","ICNTL(9): computes the solution using A or A^T","None",mumps->id.ICNTL(9),&mumps->id.ICNTL(9),NULL)); handled by MatSolveTranspose_MUMPS() */ 2753 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_10", "ICNTL(10): max num of refinements", "None", mumps->id.ICNTL(10), &mumps->id.ICNTL(10), NULL)); 2754 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_11", "ICNTL(11): statistics related to an error analysis (via -ksp_view)", "None", mumps->id.ICNTL(11), &mumps->id.ICNTL(11), NULL)); 2755 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_12", "ICNTL(12): an ordering strategy for symmetric matrices (0 to 3)", "None", mumps->id.ICNTL(12), &mumps->id.ICNTL(12), NULL)); 2756 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_13", "ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting", "None", mumps->id.ICNTL(13), &mumps->id.ICNTL(13), NULL)); 2757 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_14", "ICNTL(14): percentage increase in the estimated working space", "None", mumps->id.ICNTL(14), &mumps->id.ICNTL(14), NULL)); 2758 PetscCall(MatGetBlockSizes(A, &rbs, &cbs)); 2759 if (rbs == cbs && rbs > 1) mumps->id.ICNTL(15) = (PetscMUMPSInt)-rbs; 2760 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_15", "ICNTL(15): compression of the input matrix resulting from a block format", "None", mumps->id.ICNTL(15), &mumps->id.ICNTL(15), &flg)); 2761 if (flg) { 2762 if (mumps->id.ICNTL(15) < 0) PetscCheck((-mumps->id.ICNTL(15) % cbs == 0) && (-mumps->id.ICNTL(15) % rbs == 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "The opposite of -mat_mumps_icntl_15 must be a multiple of the column and row blocksizes"); 2763 else if (mumps->id.ICNTL(15) > 0) { 2764 const PetscInt *bsizes; 2765 PetscInt nblocks, p, *blkptr = NULL; 2766 PetscMPIInt *recvcounts, *displs, n; 2767 PetscMPIInt rank, size = 0; 2768 2769 PetscCall(MatGetVariableBlockSizes(A, &nblocks, &bsizes)); 2770 flg = PETSC_TRUE; 2771 for (p = 0; p < nblocks; ++p) { 2772 if (bsizes[p] > 1) break; 2773 } 2774 if (p == nblocks) flg = PETSC_FALSE; 2775 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &flg, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 2776 if (flg) { // if at least one process supplies variable block sizes and they are not all set to 1 2777 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2778 if (rank == 0) PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2779 PetscCall(PetscCalloc2(size, &recvcounts, size + 1, &displs)); 2780 PetscCall(PetscMPIIntCast(nblocks, &n)); 2781 PetscCallMPI(MPI_Gather(&n, 1, MPI_INT, recvcounts, 1, MPI_INT, 0, PetscObjectComm((PetscObject)A))); 2782 for (PetscInt p = 0; p < size; ++p) displs[p + 1] = displs[p] + recvcounts[p]; 2783 PetscCall(PetscMalloc1(displs[size] + 1, &blkptr)); 2784 PetscCallMPI(MPI_Bcast(displs + size, 1, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2785 PetscCallMPI(MPI_Gatherv(bsizes, n, MPIU_INT, blkptr + 1, recvcounts, displs, MPIU_INT, 0, PetscObjectComm((PetscObject)A))); 2786 if (rank == 0) { 2787 blkptr[0] = 1; 2788 for (PetscInt p = 0; p < n; ++p) blkptr[p + 1] += blkptr[p]; 2789 PetscCall(MatMumpsSetBlk(F, displs[size], NULL, blkptr)); 2790 } 2791 PetscCall(PetscFree2(recvcounts, displs)); 2792 PetscCall(PetscFree(blkptr)); 2793 } 2794 } 2795 } 2796 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_19", "ICNTL(19): computes the Schur complement", "None", mumps->id.ICNTL(19), &mumps->id.ICNTL(19), NULL)); 2797 if (mumps->id.ICNTL(19) <= 0 || mumps->id.ICNTL(19) > 3) { /* reset any schur data (if any) */ 2798 PetscCall(MatDestroy(&F->schur)); 2799 PetscCall(MatMumpsResetSchur_Private(mumps)); 2800 } 2801 2802 /* Two MPICH Fortran MPI_IN_PLACE binding bugs prevented the use of 'mpich + mumps'. One happened with "mpi4py + mpich + mumps", 2803 and was reported by Firedrake. See https://bitbucket.org/mpi4py/mpi4py/issues/162/mpi4py-initialization-breaks-fortran 2804 and a petsc-maint mailing list thread with subject 'MUMPS segfaults in parallel because of ...' 2805 This bug was fixed by https://github.com/pmodels/mpich/pull/4149. But the fix brought a new bug, 2806 see https://github.com/pmodels/mpich/issues/5589. This bug was fixed by https://github.com/pmodels/mpich/pull/5590. 2807 In short, we could not use distributed RHS until with MPICH v4.0b1 or we enabled a workaround in mumps-5.6.2+ 2808 */ 2809 mumps->ICNTL20 = 10; /* Distributed dense RHS, by default */ 2810 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) || (PetscDefined(HAVE_MPICH) && MPICH_NUMVERSION < 40000101) || PetscDefined(HAVE_MSMPI) 2811 mumps->ICNTL20 = 0; /* Centralized dense RHS, if need be */ 2812 #endif 2813 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_20", "ICNTL(20): give mumps centralized (0) or distributed (10) dense right-hand sides", "None", mumps->ICNTL20, &mumps->ICNTL20, &flg)); 2814 PetscCheck(!flg || mumps->ICNTL20 == 10 || mumps->ICNTL20 == 0, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=%d is not supported by the PETSc/MUMPS interface. Allowed values are 0, 10", (int)mumps->ICNTL20); 2815 #if PETSC_PKG_MUMPS_VERSION_LT(5, 3, 0) 2816 PetscCheck(!flg || mumps->ICNTL20 != 10, PETSC_COMM_SELF, PETSC_ERR_SUP, "ICNTL(20)=10 is not supported before MUMPS-5.3.0"); 2817 #endif 2818 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_21","ICNTL(21): the distribution (centralized or distributed) of the solution vectors","None",mumps->id.ICNTL(21),&mumps->id.ICNTL(21),NULL)); we only use distributed solution vector */ 2819 2820 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_22", "ICNTL(22): in-core/out-of-core factorization and solve (0 or 1)", "None", mumps->id.ICNTL(22), &mumps->id.ICNTL(22), NULL)); 2821 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_23", "ICNTL(23): max size of the working memory (MB) that can allocate per processor", "None", mumps->id.ICNTL(23), &mumps->id.ICNTL(23), NULL)); 2822 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_24", "ICNTL(24): detection of null pivot rows (0 or 1)", "None", mumps->id.ICNTL(24), &mumps->id.ICNTL(24), NULL)); 2823 if (mumps->id.ICNTL(24)) mumps->id.ICNTL(13) = 1; /* turn-off ScaLAPACK to help with the correct detection of null pivots */ 2824 2825 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_25", "ICNTL(25): computes a solution of a deficient matrix and a null space basis", "None", mumps->id.ICNTL(25), &mumps->id.ICNTL(25), NULL)); 2826 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_26", "ICNTL(26): drives the solution phase if a Schur complement matrix", "None", mumps->id.ICNTL(26), &mumps->id.ICNTL(26), NULL)); 2827 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_27", "ICNTL(27): controls the blocking size for multiple right-hand sides", "None", mumps->id.ICNTL(27), &mumps->id.ICNTL(27), NULL)); 2828 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_28", "ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering", "None", mumps->id.ICNTL(28), &mumps->id.ICNTL(28), NULL)); 2829 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_29", "ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis", "None", mumps->id.ICNTL(29), &mumps->id.ICNTL(29), NULL)); 2830 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_30","ICNTL(30): compute user-specified set of entries in inv(A)","None",mumps->id.ICNTL(30),&mumps->id.ICNTL(30),NULL)); */ /* call MatMumpsGetInverse() directly */ 2831 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_31", "ICNTL(31): indicates which factors may be discarded during factorization", "None", mumps->id.ICNTL(31), &mumps->id.ICNTL(31), NULL)); 2832 /* PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_32","ICNTL(32): performs the forward elimination of the right-hand sides during factorization","None",mumps->id.ICNTL(32),&mumps->id.ICNTL(32),NULL)); -- not supported by PETSc API */ 2833 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_33", "ICNTL(33): compute determinant", "None", mumps->id.ICNTL(33), &mumps->id.ICNTL(33), NULL)); 2834 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_35", "ICNTL(35): activates Block Low Rank (BLR) based factorization", "None", mumps->id.ICNTL(35), &mumps->id.ICNTL(35), NULL)); 2835 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_36", "ICNTL(36): choice of BLR factorization variant", "None", mumps->id.ICNTL(36), &mumps->id.ICNTL(36), NULL)); 2836 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_37", "ICNTL(37): compression of the contribution blocks (CB)", "None", mumps->id.ICNTL(37), &mumps->id.ICNTL(37), NULL)); 2837 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_38", "ICNTL(38): estimated compression rate of LU factors with BLR", "None", mumps->id.ICNTL(38), &mumps->id.ICNTL(38), NULL)); 2838 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_48", "ICNTL(48): multithreading with tree parallelism", "None", mumps->id.ICNTL(48), &mumps->id.ICNTL(48), NULL)); 2839 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_49", "ICNTL(49): compact workarray at the end of factorization phase", "None", mumps->id.ICNTL(49), &mumps->id.ICNTL(49), NULL)); 2840 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_56", "ICNTL(56): postponing and rank-revealing factorization", "None", mumps->id.ICNTL(56), &mumps->id.ICNTL(56), NULL)); 2841 PetscCall(PetscOptionsMUMPSInt("-mat_mumps_icntl_58", "ICNTL(58): defines options for symbolic factorization", "None", mumps->id.ICNTL(58), &mumps->id.ICNTL(58), NULL)); 2842 2843 PetscCall(PetscOptionsReal("-mat_mumps_cntl_1", "CNTL(1): relative pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 1), &cntl, &flg)); 2844 if (flg) ID_CNTL_SET(mumps->id, 1, cntl); 2845 PetscCall(PetscOptionsReal("-mat_mumps_cntl_2", "CNTL(2): stopping criterion of refinement", "None", (PetscReal)ID_CNTL_GET(mumps->id, 2), &cntl, &flg)); 2846 if (flg) ID_CNTL_SET(mumps->id, 2, cntl); 2847 PetscCall(PetscOptionsReal("-mat_mumps_cntl_3", "CNTL(3): absolute pivoting threshold", "None", (PetscReal)ID_CNTL_GET(mumps->id, 3), &cntl, &flg)); 2848 if (flg) ID_CNTL_SET(mumps->id, 3, cntl); 2849 PetscCall(PetscOptionsReal("-mat_mumps_cntl_4", "CNTL(4): value for static pivoting", "None", (PetscReal)ID_CNTL_GET(mumps->id, 4), &cntl, &flg)); 2850 if (flg) ID_CNTL_SET(mumps->id, 4, cntl); 2851 PetscCall(PetscOptionsReal("-mat_mumps_cntl_5", "CNTL(5): fixation for null pivots", "None", (PetscReal)ID_CNTL_GET(mumps->id, 5), &cntl, &flg)); 2852 if (flg) ID_CNTL_SET(mumps->id, 5, cntl); 2853 PetscCall(PetscOptionsReal("-mat_mumps_cntl_7", "CNTL(7): dropping parameter used during BLR", "None", (PetscReal)ID_CNTL_GET(mumps->id, 7), &cntl, &flg)); 2854 if (flg) ID_CNTL_SET(mumps->id, 7, cntl); 2855 2856 PetscCall(PetscOptionsString("-mat_mumps_ooc_tmpdir", "out of core directory", "None", mumps->id.ooc_tmpdir, mumps->id.ooc_tmpdir, sizeof(mumps->id.ooc_tmpdir), NULL)); 2857 2858 PetscCall(PetscOptionsIntArray("-mat_mumps_view_info", "request INFO local to each processor", "", info, &ninfo, NULL)); 2859 if (ninfo) { 2860 PetscCheck(ninfo <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "number of INFO %" PetscInt_FMT " must <= 80", ninfo); 2861 PetscCall(PetscMalloc1(ninfo, &mumps->info)); 2862 mumps->ninfo = ninfo; 2863 for (i = 0; i < ninfo; i++) { 2864 PetscCheck(info[i] >= 0 && info[i] <= 80, PETSC_COMM_SELF, PETSC_ERR_USER, "index of INFO %" PetscInt_FMT " must between 1 and 80", ninfo); 2865 mumps->info[i] = info[i]; 2866 } 2867 } 2868 PetscOptionsEnd(); 2869 PetscFunctionReturn(PETSC_SUCCESS); 2870 } 2871 2872 static PetscErrorCode MatFactorSymbolic_MUMPS_ReportIfError(Mat F, Mat A, PETSC_UNUSED const MatFactorInfo *info, Mat_MUMPS *mumps) 2873 { 2874 PetscFunctionBegin; 2875 if (mumps->id.INFOG(1) < 0) { 2876 PetscCheck(!A->erroriffailure, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in analysis: INFOG(1)=%d " MUMPS_MANUALS, mumps->id.INFOG(1)); 2877 if (mumps->id.INFOG(1) == -6) { 2878 PetscCall(PetscInfo(F, "MUMPS error in analysis: matrix is singular, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2879 F->factorerrortype = MAT_FACTOR_STRUCT_ZEROPIVOT; 2880 } else if (mumps->id.INFOG(1) == -5 || mumps->id.INFOG(1) == -7) { 2881 PetscCall(PetscInfo(F, "MUMPS error in analysis: problem with work array, INFOG(1)=%d, INFO(2)=%d\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2882 F->factorerrortype = MAT_FACTOR_OUTMEMORY; 2883 } else { 2884 PetscCall(PetscInfo(F, "MUMPS error in analysis: INFOG(1)=%d, INFO(2)=%d " MUMPS_MANUALS "\n", mumps->id.INFOG(1), mumps->id.INFO(2))); 2885 F->factorerrortype = MAT_FACTOR_OTHER; 2886 } 2887 } 2888 if (!mumps->id.n) F->factorerrortype = MAT_FACTOR_NOERROR; 2889 PetscFunctionReturn(PETSC_SUCCESS); 2890 } 2891 2892 static PetscErrorCode MatLUFactorSymbolic_AIJMUMPS(Mat F, Mat A, IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2893 { 2894 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2895 Vec b; 2896 const PetscInt M = A->rmap->N; 2897 2898 PetscFunctionBegin; 2899 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2900 /* F is assembled by a previous call of MatLUFactorSymbolic_AIJMUMPS() */ 2901 PetscFunctionReturn(PETSC_SUCCESS); 2902 } 2903 2904 /* Set MUMPS options from the options database */ 2905 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2906 2907 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2908 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2909 2910 /* analysis phase */ 2911 mumps->id.job = JOB_FACTSYMBOLIC; 2912 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2913 switch (mumps->id.ICNTL(18)) { 2914 case 0: /* centralized assembled matrix input */ 2915 if (!mumps->myid) { 2916 mumps->id.nnz = mumps->nnz; 2917 mumps->id.irn = mumps->irn; 2918 mumps->id.jcn = mumps->jcn; 2919 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2920 if (r && mumps->id.ICNTL(7) == 7) { 2921 mumps->id.ICNTL(7) = 1; 2922 if (!mumps->myid) { 2923 const PetscInt *idx; 2924 PetscInt i; 2925 2926 PetscCall(PetscMalloc1(M, &mumps->id.perm_in)); 2927 PetscCall(ISGetIndices(r, &idx)); 2928 for (i = 0; i < M; i++) PetscCall(PetscMUMPSIntCast(idx[i] + 1, &mumps->id.perm_in[i])); /* perm_in[]: start from 1, not 0! */ 2929 PetscCall(ISRestoreIndices(r, &idx)); 2930 } 2931 } 2932 } 2933 break; 2934 case 3: /* distributed assembled matrix input (size>1) */ 2935 mumps->id.nnz_loc = mumps->nnz; 2936 mumps->id.irn_loc = mumps->irn; 2937 mumps->id.jcn_loc = mumps->jcn; 2938 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2939 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2940 PetscCall(MatCreateVecs(A, NULL, &b)); 2941 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2942 PetscCall(VecDestroy(&b)); 2943 } 2944 break; 2945 } 2946 PetscMUMPS_c(mumps); 2947 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 2948 2949 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 2950 F->ops->solve = MatSolve_MUMPS; 2951 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 2952 F->ops->matsolve = MatMatSolve_MUMPS; 2953 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 2954 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 2955 2956 mumps->matstruc = SAME_NONZERO_PATTERN; 2957 PetscFunctionReturn(PETSC_SUCCESS); 2958 } 2959 2960 /* Note the PETSc r and c permutations are ignored */ 2961 static PetscErrorCode MatLUFactorSymbolic_BAIJMUMPS(Mat F, Mat A, PETSC_UNUSED IS r, PETSC_UNUSED IS c, const MatFactorInfo *info) 2962 { 2963 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 2964 Vec b; 2965 const PetscInt M = A->rmap->N; 2966 2967 PetscFunctionBegin; 2968 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 2969 /* F is assembled by a previous call of MatLUFactorSymbolic_BAIJMUMPS() */ 2970 PetscFunctionReturn(PETSC_SUCCESS); 2971 } 2972 2973 /* Set MUMPS options from the options database */ 2974 PetscCall(MatSetFromOptions_MUMPS(F, A)); 2975 2976 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 2977 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 2978 2979 /* analysis phase */ 2980 mumps->id.job = JOB_FACTSYMBOLIC; 2981 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 2982 switch (mumps->id.ICNTL(18)) { 2983 case 0: /* centralized assembled matrix input */ 2984 if (!mumps->myid) { 2985 mumps->id.nnz = mumps->nnz; 2986 mumps->id.irn = mumps->irn; 2987 mumps->id.jcn = mumps->jcn; 2988 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 2989 } 2990 break; 2991 case 3: /* distributed assembled matrix input (size>1) */ 2992 mumps->id.nnz_loc = mumps->nnz; 2993 mumps->id.irn_loc = mumps->irn; 2994 mumps->id.jcn_loc = mumps->jcn; 2995 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 2996 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 2997 PetscCall(MatCreateVecs(A, NULL, &b)); 2998 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 2999 PetscCall(VecDestroy(&b)); 3000 } 3001 break; 3002 } 3003 PetscMUMPS_c(mumps); 3004 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 3005 3006 F->ops->lufactornumeric = MatFactorNumeric_MUMPS; 3007 F->ops->solve = MatSolve_MUMPS; 3008 F->ops->solvetranspose = MatSolveTranspose_MUMPS; 3009 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3010 3011 mumps->matstruc = SAME_NONZERO_PATTERN; 3012 PetscFunctionReturn(PETSC_SUCCESS); 3013 } 3014 3015 /* Note the PETSc r permutation and factor info are ignored */ 3016 static PetscErrorCode MatCholeskyFactorSymbolic_MUMPS(Mat F, Mat A, PETSC_UNUSED IS r, const MatFactorInfo *info) 3017 { 3018 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3019 Vec b; 3020 const PetscInt M = A->rmap->N; 3021 3022 PetscFunctionBegin; 3023 if (mumps->matstruc == SAME_NONZERO_PATTERN) { 3024 /* F is assembled by a previous call of MatCholeskyFactorSymbolic_MUMPS() */ 3025 PetscFunctionReturn(PETSC_SUCCESS); 3026 } 3027 3028 /* Set MUMPS options from the options database */ 3029 PetscCall(MatSetFromOptions_MUMPS(F, A)); 3030 3031 PetscCall((*mumps->ConvertToTriples)(A, 1, MAT_INITIAL_MATRIX, mumps)); 3032 PetscCall(MatMumpsGatherNonzerosOnMaster(MAT_INITIAL_MATRIX, mumps)); 3033 3034 /* analysis phase */ 3035 mumps->id.job = JOB_FACTSYMBOLIC; 3036 PetscCall(PetscMUMPSIntCast(M, &mumps->id.n)); 3037 switch (mumps->id.ICNTL(18)) { 3038 case 0: /* centralized assembled matrix input */ 3039 if (!mumps->myid) { 3040 mumps->id.nnz = mumps->nnz; 3041 mumps->id.irn = mumps->irn; 3042 mumps->id.jcn = mumps->jcn; 3043 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_len, &mumps->id.a)); 3044 } 3045 break; 3046 case 3: /* distributed assembled matrix input (size>1) */ 3047 mumps->id.nnz_loc = mumps->nnz; 3048 mumps->id.irn_loc = mumps->irn; 3049 mumps->id.jcn_loc = mumps->jcn; 3050 if (1 < mumps->id.ICNTL(6) && mumps->id.ICNTL(6) < 7) PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, mumps->nnz, mumps->val, mumps->id.precision, &mumps->id.a_loc_len, &mumps->id.a_loc)); 3051 if (mumps->ICNTL20 == 0) { /* Centralized rhs. Create scatter scat_rhs for repeated use in MatSolve() */ 3052 PetscCall(MatCreateVecs(A, NULL, &b)); 3053 PetscCall(VecScatterCreateToZero(b, &mumps->scat_rhs, &mumps->b_seq)); 3054 PetscCall(VecDestroy(&b)); 3055 } 3056 break; 3057 } 3058 PetscMUMPS_c(mumps); 3059 PetscCall(MatFactorSymbolic_MUMPS_ReportIfError(F, A, info, mumps)); 3060 3061 F->ops->choleskyfactornumeric = MatFactorNumeric_MUMPS; 3062 F->ops->solve = MatSolve_MUMPS; 3063 F->ops->solvetranspose = MatSolve_MUMPS; 3064 F->ops->matsolve = MatMatSolve_MUMPS; 3065 F->ops->mattransposesolve = MatMatTransposeSolve_MUMPS; 3066 F->ops->matsolvetranspose = MatMatSolveTranspose_MUMPS; 3067 #if defined(PETSC_USE_COMPLEX) 3068 F->ops->getinertia = NULL; 3069 #else 3070 F->ops->getinertia = MatGetInertia_SBAIJMUMPS; 3071 #endif 3072 3073 mumps->matstruc = SAME_NONZERO_PATTERN; 3074 PetscFunctionReturn(PETSC_SUCCESS); 3075 } 3076 3077 static PetscErrorCode MatView_MUMPS(Mat A, PetscViewer viewer) 3078 { 3079 PetscBool isascii; 3080 PetscViewerFormat format; 3081 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3082 3083 PetscFunctionBegin; 3084 /* check if matrix is mumps type */ 3085 if (A->ops->solve != MatSolve_MUMPS) PetscFunctionReturn(PETSC_SUCCESS); 3086 3087 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 3088 if (isascii) { 3089 PetscCall(PetscViewerGetFormat(viewer, &format)); 3090 if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3091 PetscCall(PetscViewerASCIIPrintf(viewer, "MUMPS run parameters:\n")); 3092 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 3093 PetscCall(PetscViewerASCIIPrintf(viewer, " SYM (matrix type): %d\n", mumps->id.sym)); 3094 PetscCall(PetscViewerASCIIPrintf(viewer, " PAR (host participation): %d\n", mumps->id.par)); 3095 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(1) (output for error): %d\n", mumps->id.ICNTL(1))); 3096 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(2) (output of diagnostic msg): %d\n", mumps->id.ICNTL(2))); 3097 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(3) (output for global info): %d\n", mumps->id.ICNTL(3))); 3098 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(4) (level of printing): %d\n", mumps->id.ICNTL(4))); 3099 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(5) (input mat struct): %d\n", mumps->id.ICNTL(5))); 3100 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(6) (matrix prescaling): %d\n", mumps->id.ICNTL(6))); 3101 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(7) (sequential matrix ordering):%d\n", mumps->id.ICNTL(7))); 3102 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(8) (scaling strategy): %d\n", mumps->id.ICNTL(8))); 3103 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(10) (max num of refinements): %d\n", mumps->id.ICNTL(10))); 3104 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(11) (error analysis): %d\n", mumps->id.ICNTL(11))); 3105 if (mumps->id.ICNTL(11) > 0) { 3106 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(4) (inf norm of input mat): %g\n", (double)ID_RINFOG_GET(mumps->id, 4))); 3107 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(5) (inf norm of solution): %g\n", (double)ID_RINFOG_GET(mumps->id, 5))); 3108 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(6) (inf norm of residual): %g\n", (double)ID_RINFOG_GET(mumps->id, 6))); 3109 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(7),RINFOG(8) (backward error est): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 7), (double)ID_RINFOG_GET(mumps->id, 8))); 3110 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(9) (error estimate): %g\n", (double)ID_RINFOG_GET(mumps->id, 9))); 3111 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(10),RINFOG(11)(condition numbers): %g, %g\n", (double)ID_RINFOG_GET(mumps->id, 10), (double)ID_RINFOG_GET(mumps->id, 11))); 3112 } 3113 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(12) (efficiency control): %d\n", mumps->id.ICNTL(12))); 3114 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(13) (sequential factorization of the root node): %d\n", mumps->id.ICNTL(13))); 3115 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(14) (percentage of estimated workspace increase): %d\n", mumps->id.ICNTL(14))); 3116 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(15) (compression of the input matrix): %d\n", mumps->id.ICNTL(15))); 3117 /* ICNTL(15-17) not used */ 3118 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(18) (input mat struct): %d\n", mumps->id.ICNTL(18))); 3119 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(19) (Schur complement info): %d\n", mumps->id.ICNTL(19))); 3120 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(20) (RHS sparse pattern): %d\n", mumps->id.ICNTL(20))); 3121 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(21) (solution struct): %d\n", mumps->id.ICNTL(21))); 3122 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(22) (in-core/out-of-core facility): %d\n", mumps->id.ICNTL(22))); 3123 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(23) (max size of memory can be allocated locally):%d\n", mumps->id.ICNTL(23))); 3124 3125 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(24) (detection of null pivot rows): %d\n", mumps->id.ICNTL(24))); 3126 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(25) (computation of a null space basis): %d\n", mumps->id.ICNTL(25))); 3127 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(26) (Schur options for RHS or solution): %d\n", mumps->id.ICNTL(26))); 3128 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(27) (blocking size for multiple RHS): %d\n", mumps->id.ICNTL(27))); 3129 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(28) (use parallel or sequential ordering): %d\n", mumps->id.ICNTL(28))); 3130 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(29) (parallel ordering): %d\n", mumps->id.ICNTL(29))); 3131 3132 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(30) (user-specified set of entries in inv(A)): %d\n", mumps->id.ICNTL(30))); 3133 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(31) (factors is discarded in the solve phase): %d\n", mumps->id.ICNTL(31))); 3134 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(33) (compute determinant): %d\n", mumps->id.ICNTL(33))); 3135 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(35) (activate BLR based factorization): %d\n", mumps->id.ICNTL(35))); 3136 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(36) (choice of BLR factorization variant): %d\n", mumps->id.ICNTL(36))); 3137 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(37) (compression of the contribution blocks): %d\n", mumps->id.ICNTL(37))); 3138 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(38) (estimated compression rate of LU factors): %d\n", mumps->id.ICNTL(38))); 3139 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(48) (multithreading with tree parallelism): %d\n", mumps->id.ICNTL(48))); 3140 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(49) (compact workarray at the end of factorization phase):%d\n", mumps->id.ICNTL(49))); 3141 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(56) (postponing and rank-revealing factorization):%d\n", mumps->id.ICNTL(56))); 3142 PetscCall(PetscViewerASCIIPrintf(viewer, " ICNTL(58) (options for symbolic factorization): %d\n", mumps->id.ICNTL(58))); 3143 3144 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(1) (relative pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 1))); 3145 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(2) (stopping criterion of refinement): %g\n", (double)ID_CNTL_GET(mumps->id, 2))); 3146 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(3) (absolute pivoting threshold): %g\n", (double)ID_CNTL_GET(mumps->id, 3))); 3147 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(4) (value of static pivoting): %g\n", (double)ID_CNTL_GET(mumps->id, 4))); 3148 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(5) (fixation for null pivots): %g\n", (double)ID_CNTL_GET(mumps->id, 5))); 3149 PetscCall(PetscViewerASCIIPrintf(viewer, " CNTL(7) (dropping parameter for BLR): %g\n", (double)ID_CNTL_GET(mumps->id, 7))); 3150 3151 /* information local to each processor */ 3152 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(1) (local estimated flops for the elimination after analysis):\n")); 3153 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 3154 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 1))); 3155 PetscCall(PetscViewerFlush(viewer)); 3156 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(2) (local estimated flops for the assembly after factorization):\n")); 3157 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 2))); 3158 PetscCall(PetscViewerFlush(viewer)); 3159 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFO(3) (local estimated flops for the elimination after factorization):\n")); 3160 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %g\n", mumps->myid, (double)ID_RINFO_GET(mumps->id, 3))); 3161 PetscCall(PetscViewerFlush(viewer)); 3162 3163 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(15) (estimated size of (in MB) MUMPS internal data for running numerical factorization):\n")); 3164 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(15))); 3165 PetscCall(PetscViewerFlush(viewer)); 3166 3167 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(16) (size of (in MB) MUMPS internal data used during numerical factorization):\n")); 3168 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(16))); 3169 PetscCall(PetscViewerFlush(viewer)); 3170 3171 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(23) (num of pivots eliminated on this processor after factorization):\n")); 3172 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(23))); 3173 PetscCall(PetscViewerFlush(viewer)); 3174 3175 if (mumps->ninfo && mumps->ninfo <= 80) { 3176 PetscInt i; 3177 for (i = 0; i < mumps->ninfo; i++) { 3178 PetscCall(PetscViewerASCIIPrintf(viewer, " INFO(%" PetscInt_FMT "):\n", mumps->info[i])); 3179 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " [%d] %d\n", mumps->myid, mumps->id.INFO(mumps->info[i]))); 3180 PetscCall(PetscViewerFlush(viewer)); 3181 } 3182 } 3183 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 3184 } else PetscCall(PetscViewerASCIIPrintf(viewer, " Use -%sksp_view ::ascii_info_detail to display information for all processes\n", ((PetscObject)A)->prefix ? ((PetscObject)A)->prefix : "")); 3185 3186 if (mumps->myid == 0) { /* information from the host */ 3187 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(1) (global estimated flops for the elimination after analysis): %g\n", (double)ID_RINFOG_GET(mumps->id, 1))); 3188 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(2) (global estimated flops for the assembly after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 2))); 3189 PetscCall(PetscViewerASCIIPrintf(viewer, " RINFOG(3) (global estimated flops for the elimination after factorization): %g\n", (double)ID_RINFOG_GET(mumps->id, 3))); 3190 PetscCall(PetscViewerASCIIPrintf(viewer, " (RINFOG(12) RINFOG(13))*2^INFOG(34) (determinant): (%g,%g)*(2^%d)\n", (double)ID_RINFOG_GET(mumps->id, 12), (double)ID_RINFOG_GET(mumps->id, 13), mumps->id.INFOG(34))); 3191 3192 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(3) (estimated real workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(3))); 3193 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(4) (estimated integer workspace for factors on all processors after analysis): %d\n", mumps->id.INFOG(4))); 3194 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(5) (estimated maximum front size in the complete tree): %d\n", mumps->id.INFOG(5))); 3195 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(6) (number of nodes in the complete tree): %d\n", mumps->id.INFOG(6))); 3196 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(7) (ordering option effectively used after analysis): %d\n", mumps->id.INFOG(7))); 3197 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(8) (structural symmetry in percent of the permuted matrix after analysis): %d\n", mumps->id.INFOG(8))); 3198 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(9) (total real/complex workspace to store the matrix factors after factorization): %d\n", mumps->id.INFOG(9))); 3199 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(10) (total integer space store the matrix factors after factorization): %d\n", mumps->id.INFOG(10))); 3200 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(11) (order of largest frontal matrix after factorization): %d\n", mumps->id.INFOG(11))); 3201 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(12) (number of off-diagonal pivots): %d\n", mumps->id.INFOG(12))); 3202 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(13) (number of delayed pivots after factorization): %d\n", mumps->id.INFOG(13))); 3203 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(14) (number of memory compress after factorization): %d\n", mumps->id.INFOG(14))); 3204 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(15) (number of steps of iterative refinement after solution): %d\n", mumps->id.INFOG(15))); 3205 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(16) (estimated size (in MB) of all MUMPS internal data for factorization after analysis: value on the most memory consuming processor): %d\n", mumps->id.INFOG(16))); 3206 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(17) (estimated size of all MUMPS internal data for factorization after analysis: sum over all processors): %d\n", mumps->id.INFOG(17))); 3207 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(18) (size of all MUMPS internal data allocated during factorization: value on the most memory consuming processor): %d\n", mumps->id.INFOG(18))); 3208 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(19) (size of all MUMPS internal data allocated during factorization: sum over all processors): %d\n", mumps->id.INFOG(19))); 3209 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(20) (estimated number of entries in the factors): %d\n", mumps->id.INFOG(20))); 3210 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(21) (size in MB of memory effectively used during factorization - value on the most memory consuming processor): %d\n", mumps->id.INFOG(21))); 3211 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(22) (size in MB of memory effectively used during factorization - sum over all processors): %d\n", mumps->id.INFOG(22))); 3212 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(23) (after analysis: value of ICNTL(6) effectively used): %d\n", mumps->id.INFOG(23))); 3213 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(24) (after analysis: value of ICNTL(12) effectively used): %d\n", mumps->id.INFOG(24))); 3214 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(25) (after factorization: number of pivots modified by static pivoting): %d\n", mumps->id.INFOG(25))); 3215 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(28) (after factorization: number of null pivots encountered): %d\n", mumps->id.INFOG(28))); 3216 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(29) (after factorization: effective number of entries in the factors (sum over all processors)): %d\n", mumps->id.INFOG(29))); 3217 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(30, 31) (after solution: size in Mbytes of memory used during solution phase): %d, %d\n", mumps->id.INFOG(30), mumps->id.INFOG(31))); 3218 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(32) (after analysis: type of analysis done): %d\n", mumps->id.INFOG(32))); 3219 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(33) (value used for ICNTL(8)): %d\n", mumps->id.INFOG(33))); 3220 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(34) (exponent of the determinant if determinant is requested): %d\n", mumps->id.INFOG(34))); 3221 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(35) (after factorization: number of entries taking into account BLR factor compression - sum over all processors): %d\n", mumps->id.INFOG(35))); 3222 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(36) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(36))); 3223 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(37) (after analysis: estimated size of all MUMPS internal data for running BLR in-core - sum over all processors): %d\n", mumps->id.INFOG(37))); 3224 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(38) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - value on the most memory consuming processor): %d\n", mumps->id.INFOG(38))); 3225 PetscCall(PetscViewerASCIIPrintf(viewer, " INFOG(39) (after analysis: estimated size of all MUMPS internal data for running BLR out-of-core - sum over all processors): %d\n", mumps->id.INFOG(39))); 3226 } 3227 } 3228 } 3229 PetscFunctionReturn(PETSC_SUCCESS); 3230 } 3231 3232 static PetscErrorCode MatGetInfo_MUMPS(Mat A, PETSC_UNUSED MatInfoType flag, MatInfo *info) 3233 { 3234 Mat_MUMPS *mumps = (Mat_MUMPS *)A->data; 3235 3236 PetscFunctionBegin; 3237 info->block_size = 1.0; 3238 info->nz_allocated = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3239 info->nz_used = mumps->id.INFOG(20) >= 0 ? mumps->id.INFOG(20) : -1000000 * mumps->id.INFOG(20); 3240 info->nz_unneeded = 0.0; 3241 info->assemblies = 0.0; 3242 info->mallocs = 0.0; 3243 info->memory = 0.0; 3244 info->fill_ratio_given = 0; 3245 info->fill_ratio_needed = 0; 3246 info->factor_mallocs = 0; 3247 PetscFunctionReturn(PETSC_SUCCESS); 3248 } 3249 3250 static PetscErrorCode MatFactorSetSchurIS_MUMPS(Mat F, IS is) 3251 { 3252 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3253 const PetscScalar *arr; 3254 const PetscInt *idxs; 3255 PetscInt size, i; 3256 3257 PetscFunctionBegin; 3258 PetscCall(ISGetLocalSize(is, &size)); 3259 /* Schur complement matrix */ 3260 PetscCall(MatDestroy(&F->schur)); 3261 PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size, size, NULL, &F->schur)); 3262 PetscCall(MatDenseGetArrayRead(F->schur, &arr)); 3263 // don't allocate mumps->id.schur[] now as its precision is yet to know 3264 PetscCall(PetscMUMPSIntCast(size, &mumps->id.size_schur)); 3265 PetscCall(PetscMUMPSIntCast(size, &mumps->id.schur_lld)); 3266 PetscCall(MatDenseRestoreArrayRead(F->schur, &arr)); 3267 if (mumps->sym == 1) PetscCall(MatSetOption(F->schur, MAT_SPD, PETSC_TRUE)); 3268 3269 /* MUMPS expects Fortran style indices */ 3270 PetscCall(PetscFree(mumps->id.listvar_schur)); 3271 PetscCall(PetscMalloc1(size, &mumps->id.listvar_schur)); 3272 PetscCall(ISGetIndices(is, &idxs)); 3273 for (i = 0; i < size; i++) PetscCall(PetscMUMPSIntCast(idxs[i] + 1, &mumps->id.listvar_schur[i])); 3274 PetscCall(ISRestoreIndices(is, &idxs)); 3275 /* set a special value of ICNTL (not handled my MUMPS) to be used in the solve phase by PETSc */ 3276 if (mumps->id.icntl) mumps->id.ICNTL(26) = -1; 3277 else mumps->ICNTL26 = -1; 3278 PetscFunctionReturn(PETSC_SUCCESS); 3279 } 3280 3281 static PetscErrorCode MatFactorCreateSchurComplement_MUMPS(Mat F, Mat *S) 3282 { 3283 Mat St; 3284 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3285 PetscScalar *array; 3286 PetscInt i, j, N = mumps->id.size_schur; 3287 3288 PetscFunctionBegin; 3289 PetscCheck(mumps->id.ICNTL(19), PetscObjectComm((PetscObject)F), PETSC_ERR_ORDER, "Schur complement mode not selected! Call MatFactorSetSchurIS() to enable it"); 3290 PetscCall(MatCreate(PETSC_COMM_SELF, &St)); 3291 PetscCall(MatSetSizes(St, PETSC_DECIDE, PETSC_DECIDE, mumps->id.size_schur, mumps->id.size_schur)); 3292 PetscCall(MatSetType(St, MATDENSE)); 3293 PetscCall(MatSetUp(St)); 3294 PetscCall(MatDenseGetArray(St, &array)); 3295 if (!mumps->sym) { /* MUMPS always return a full matrix */ 3296 if (mumps->id.ICNTL(19) == 1) { /* stored by rows */ 3297 for (i = 0; i < N; i++) { 3298 for (j = 0; j < N; j++) array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3299 } 3300 } else { /* stored by columns */ 3301 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3302 } 3303 } else { /* either full or lower-triangular (not packed) */ 3304 if (mumps->id.ICNTL(19) == 2) { /* lower triangular stored by columns */ 3305 for (i = 0; i < N; i++) { 3306 for (j = i; j < N; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3307 } 3308 } else if (mumps->id.ICNTL(19) == 3) { /* full matrix */ 3309 PetscCall(MatMumpsCastMumpsScalarArray(N * N, mumps->id.precision, mumps->id.schur, array)); 3310 } else { /* ICNTL(19) == 1 lower triangular stored by rows */ 3311 for (i = 0; i < N; i++) { 3312 for (j = 0; j < i + 1; j++) array[i * N + j] = array[j * N + i] = ID_FIELD_GET(mumps->id, schur, i * N + j); 3313 } 3314 } 3315 } 3316 PetscCall(MatDenseRestoreArray(St, &array)); 3317 *S = St; 3318 PetscFunctionReturn(PETSC_SUCCESS); 3319 } 3320 3321 static PetscErrorCode MatMumpsSetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt ival) 3322 { 3323 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3324 3325 PetscFunctionBegin; 3326 if (mumps->id.job == JOB_NULL) { /* need to cache icntl and ival since PetscMUMPS_c() has never been called */ 3327 PetscMUMPSInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; /* number of already cached ICNTL */ 3328 for (i = 0; i < nICNTL_pre; ++i) 3329 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) break; /* is this ICNTL already cached? */ 3330 if (i == nICNTL_pre) { /* not already cached */ 3331 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscMUMPSInt) * (2 * nICNTL_pre + 3), &mumps->ICNTL_pre)); 3332 else PetscCall(PetscCalloc(sizeof(PetscMUMPSInt) * 3, &mumps->ICNTL_pre)); 3333 mumps->ICNTL_pre[0]++; 3334 } 3335 mumps->ICNTL_pre[1 + 2 * i] = (PetscMUMPSInt)icntl; 3336 PetscCall(PetscMUMPSIntCast(ival, mumps->ICNTL_pre + 2 + 2 * i)); 3337 } else PetscCall(PetscMUMPSIntCast(ival, &mumps->id.ICNTL(icntl))); 3338 PetscFunctionReturn(PETSC_SUCCESS); 3339 } 3340 3341 static PetscErrorCode MatMumpsGetIcntl_MUMPS(Mat F, PetscInt icntl, PetscInt *ival) 3342 { 3343 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3344 3345 PetscFunctionBegin; 3346 if (mumps->id.job == JOB_NULL) { 3347 PetscInt i, nICNTL_pre = mumps->ICNTL_pre ? mumps->ICNTL_pre[0] : 0; 3348 *ival = 0; 3349 for (i = 0; i < nICNTL_pre; ++i) { 3350 if (mumps->ICNTL_pre[1 + 2 * i] == icntl) *ival = mumps->ICNTL_pre[2 + 2 * i]; 3351 } 3352 } else *ival = mumps->id.ICNTL(icntl); 3353 PetscFunctionReturn(PETSC_SUCCESS); 3354 } 3355 3356 /*@ 3357 MatMumpsSetIcntl - Set MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3358 3359 Logically Collective 3360 3361 Input Parameters: 3362 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3363 . icntl - index of MUMPS parameter array `ICNTL()` 3364 - ival - value of MUMPS `ICNTL(icntl)` 3365 3366 Options Database Key: 3367 . -mat_mumps_icntl_<icntl> <ival> - change the option numbered `icntl` to `ival` 3368 3369 Level: beginner 3370 3371 Note: 3372 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3373 3374 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3375 @*/ 3376 PetscErrorCode MatMumpsSetIcntl(Mat F, PetscInt icntl, PetscInt ival) 3377 { 3378 PetscFunctionBegin; 3379 PetscValidType(F, 1); 3380 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3381 PetscValidLogicalCollectiveInt(F, icntl, 2); 3382 PetscValidLogicalCollectiveInt(F, ival, 3); 3383 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3384 PetscTryMethod(F, "MatMumpsSetIcntl_C", (Mat, PetscInt, PetscInt), (F, icntl, ival)); 3385 PetscFunctionReturn(PETSC_SUCCESS); 3386 } 3387 3388 /*@ 3389 MatMumpsGetIcntl - Get MUMPS parameter ICNTL() <https://mumps-solver.org/index.php?page=doc> 3390 3391 Logically Collective 3392 3393 Input Parameters: 3394 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3395 - icntl - index of MUMPS parameter array ICNTL() 3396 3397 Output Parameter: 3398 . ival - value of MUMPS ICNTL(icntl) 3399 3400 Level: beginner 3401 3402 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3403 @*/ 3404 PetscErrorCode MatMumpsGetIcntl(Mat F, PetscInt icntl, PetscInt *ival) 3405 { 3406 PetscFunctionBegin; 3407 PetscValidType(F, 1); 3408 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3409 PetscValidLogicalCollectiveInt(F, icntl, 2); 3410 PetscAssertPointer(ival, 3); 3411 PetscCheck((icntl >= 1 && icntl <= 38) || icntl == 48 || icntl == 49 || icntl == 56 || icntl == 58, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported ICNTL value %" PetscInt_FMT, icntl); 3412 PetscUseMethod(F, "MatMumpsGetIcntl_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3413 PetscFunctionReturn(PETSC_SUCCESS); 3414 } 3415 3416 static PetscErrorCode MatMumpsSetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal val) 3417 { 3418 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3419 3420 PetscFunctionBegin; 3421 if (mumps->id.job == JOB_NULL) { 3422 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3423 for (i = 0; i < nCNTL_pre; ++i) 3424 if (mumps->CNTL_pre[1 + 2 * i] == icntl) break; 3425 if (i == nCNTL_pre) { 3426 if (i > 0) PetscCall(PetscRealloc(sizeof(PetscReal) * (2 * nCNTL_pre + 3), &mumps->CNTL_pre)); 3427 else PetscCall(PetscCalloc(sizeof(PetscReal) * 3, &mumps->CNTL_pre)); 3428 mumps->CNTL_pre[0]++; 3429 } 3430 mumps->CNTL_pre[1 + 2 * i] = icntl; 3431 mumps->CNTL_pre[2 + 2 * i] = val; 3432 } else ID_CNTL_SET(mumps->id, icntl, val); 3433 PetscFunctionReturn(PETSC_SUCCESS); 3434 } 3435 3436 static PetscErrorCode MatMumpsGetCntl_MUMPS(Mat F, PetscInt icntl, PetscReal *val) 3437 { 3438 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3439 3440 PetscFunctionBegin; 3441 if (mumps->id.job == JOB_NULL) { 3442 PetscInt i, nCNTL_pre = mumps->CNTL_pre ? mumps->CNTL_pre[0] : 0; 3443 *val = 0.0; 3444 for (i = 0; i < nCNTL_pre; ++i) { 3445 if (mumps->CNTL_pre[1 + 2 * i] == icntl) *val = mumps->CNTL_pre[2 + 2 * i]; 3446 } 3447 } else *val = ID_CNTL_GET(mumps->id, icntl); 3448 PetscFunctionReturn(PETSC_SUCCESS); 3449 } 3450 3451 /*@ 3452 MatMumpsSetCntl - Set MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3453 3454 Logically Collective 3455 3456 Input Parameters: 3457 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3458 . icntl - index of MUMPS parameter array `CNTL()` 3459 - val - value of MUMPS `CNTL(icntl)` 3460 3461 Options Database Key: 3462 . -mat_mumps_cntl_<icntl> <val> - change the option numbered icntl to ival 3463 3464 Level: beginner 3465 3466 Note: 3467 Ignored if MUMPS is not installed or `F` is not a MUMPS matrix 3468 3469 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3470 @*/ 3471 PetscErrorCode MatMumpsSetCntl(Mat F, PetscInt icntl, PetscReal val) 3472 { 3473 PetscFunctionBegin; 3474 PetscValidType(F, 1); 3475 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3476 PetscValidLogicalCollectiveInt(F, icntl, 2); 3477 PetscValidLogicalCollectiveReal(F, val, 3); 3478 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3479 PetscTryMethod(F, "MatMumpsSetCntl_C", (Mat, PetscInt, PetscReal), (F, icntl, val)); 3480 PetscFunctionReturn(PETSC_SUCCESS); 3481 } 3482 3483 /*@ 3484 MatMumpsGetCntl - Get MUMPS parameter CNTL() <https://mumps-solver.org/index.php?page=doc> 3485 3486 Logically Collective 3487 3488 Input Parameters: 3489 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3490 - icntl - index of MUMPS parameter array CNTL() 3491 3492 Output Parameter: 3493 . val - value of MUMPS CNTL(icntl) 3494 3495 Level: beginner 3496 3497 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3498 @*/ 3499 PetscErrorCode MatMumpsGetCntl(Mat F, PetscInt icntl, PetscReal *val) 3500 { 3501 PetscFunctionBegin; 3502 PetscValidType(F, 1); 3503 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3504 PetscValidLogicalCollectiveInt(F, icntl, 2); 3505 PetscAssertPointer(val, 3); 3506 PetscCheck(icntl >= 1 && icntl <= 7, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONG, "Unsupported CNTL value %" PetscInt_FMT, icntl); 3507 PetscUseMethod(F, "MatMumpsGetCntl_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3508 PetscFunctionReturn(PETSC_SUCCESS); 3509 } 3510 3511 static PetscErrorCode MatMumpsGetInfo_MUMPS(Mat F, PetscInt icntl, PetscInt *info) 3512 { 3513 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3514 3515 PetscFunctionBegin; 3516 *info = mumps->id.INFO(icntl); 3517 PetscFunctionReturn(PETSC_SUCCESS); 3518 } 3519 3520 static PetscErrorCode MatMumpsGetInfog_MUMPS(Mat F, PetscInt icntl, PetscInt *infog) 3521 { 3522 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3523 3524 PetscFunctionBegin; 3525 *infog = mumps->id.INFOG(icntl); 3526 PetscFunctionReturn(PETSC_SUCCESS); 3527 } 3528 3529 static PetscErrorCode MatMumpsGetRinfo_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfo) 3530 { 3531 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3532 3533 PetscFunctionBegin; 3534 *rinfo = ID_RINFO_GET(mumps->id, icntl); 3535 PetscFunctionReturn(PETSC_SUCCESS); 3536 } 3537 3538 static PetscErrorCode MatMumpsGetRinfog_MUMPS(Mat F, PetscInt icntl, PetscReal *rinfog) 3539 { 3540 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3541 3542 PetscFunctionBegin; 3543 *rinfog = ID_RINFOG_GET(mumps->id, icntl); 3544 PetscFunctionReturn(PETSC_SUCCESS); 3545 } 3546 3547 static PetscErrorCode MatMumpsGetNullPivots_MUMPS(Mat F, PetscInt *size, PetscInt **array) 3548 { 3549 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3550 3551 PetscFunctionBegin; 3552 PetscCheck(mumps->id.ICNTL(24) == 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "-mat_mumps_icntl_24 must be set as 1 for null pivot row detection"); 3553 *size = 0; 3554 *array = NULL; 3555 if (!mumps->myid) { 3556 *size = mumps->id.INFOG(28); 3557 PetscCall(PetscMalloc1(*size, array)); 3558 for (int i = 0; i < *size; i++) (*array)[i] = mumps->id.pivnul_list[i] - 1; 3559 } 3560 PetscFunctionReturn(PETSC_SUCCESS); 3561 } 3562 3563 static PetscErrorCode MatMumpsGetInverse_MUMPS(Mat F, Mat spRHS) 3564 { 3565 Mat Bt = NULL, Btseq = NULL; 3566 PetscBool flg; 3567 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3568 PetscScalar *aa; 3569 PetscInt spnr, *ia, *ja, M, nrhs; 3570 3571 PetscFunctionBegin; 3572 PetscAssertPointer(spRHS, 2); 3573 PetscCall(PetscObjectTypeCompare((PetscObject)spRHS, MATTRANSPOSEVIRTUAL, &flg)); 3574 PetscCheck(flg, PetscObjectComm((PetscObject)spRHS), PETSC_ERR_ARG_WRONG, "Matrix spRHS must be type MATTRANSPOSEVIRTUAL matrix"); 3575 PetscCall(MatShellGetScalingShifts(spRHS, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (PetscScalar *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Vec *)MAT_SHELL_NOT_ALLOWED, (Mat *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED, (IS *)MAT_SHELL_NOT_ALLOWED)); 3576 PetscCall(MatTransposeGetMat(spRHS, &Bt)); 3577 3578 PetscCall(MatMumpsSetIcntl(F, 30, 1)); 3579 3580 if (mumps->petsc_size > 1) { 3581 Mat_MPIAIJ *b = (Mat_MPIAIJ *)Bt->data; 3582 Btseq = b->A; 3583 } else { 3584 Btseq = Bt; 3585 } 3586 3587 PetscCall(MatGetSize(spRHS, &M, &nrhs)); 3588 mumps->id.nrhs = (PetscMUMPSInt)nrhs; 3589 PetscCall(PetscMUMPSIntCast(M, &mumps->id.lrhs)); 3590 mumps->id.rhs = NULL; 3591 3592 if (!mumps->myid) { 3593 PetscCall(MatSeqAIJGetArray(Btseq, &aa)); 3594 PetscCall(MatGetRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3595 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3596 PetscCall(PetscMUMPSIntCSRCast(mumps, spnr, ia, ja, &mumps->id.irhs_ptr, &mumps->id.irhs_sparse, &mumps->id.nz_rhs)); 3597 PetscCall(MatMumpsMakeMumpsScalarArray(PETSC_TRUE, ((Mat_SeqAIJ *)Btseq->data)->nz, aa, mumps->id.precision, &mumps->id.rhs_sparse_len, &mumps->id.rhs_sparse)); 3598 } else { 3599 mumps->id.irhs_ptr = NULL; 3600 mumps->id.irhs_sparse = NULL; 3601 mumps->id.nz_rhs = 0; 3602 if (mumps->id.rhs_sparse_len) { 3603 PetscCall(PetscFree(mumps->id.rhs_sparse)); 3604 mumps->id.rhs_sparse_len = 0; 3605 } 3606 } 3607 mumps->id.ICNTL(20) = 1; /* rhs is sparse */ 3608 mumps->id.ICNTL(21) = 0; /* solution is in assembled centralized format */ 3609 3610 /* solve phase */ 3611 mumps->id.job = JOB_SOLVE; 3612 PetscMUMPS_c(mumps); 3613 PetscCheck(mumps->id.INFOG(1) >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "MUMPS error in solve: INFOG(1)=%d INFO(2)=%d " MUMPS_MANUALS, mumps->id.INFOG(1), mumps->id.INFO(2)); 3614 3615 if (!mumps->myid) { 3616 PetscCall(MatSeqAIJRestoreArray(Btseq, &aa)); 3617 PetscCall(MatRestoreRowIJ(Btseq, 1, PETSC_FALSE, PETSC_FALSE, &spnr, (const PetscInt **)&ia, (const PetscInt **)&ja, &flg)); 3618 PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot get IJ structure"); 3619 } 3620 PetscFunctionReturn(PETSC_SUCCESS); 3621 } 3622 3623 /*@ 3624 MatMumpsGetInverse - Get user-specified set of entries in inverse of `A` <https://mumps-solver.org/index.php?page=doc> 3625 3626 Logically Collective 3627 3628 Input Parameter: 3629 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3630 3631 Output Parameter: 3632 . spRHS - sequential sparse matrix in `MATTRANSPOSEVIRTUAL` format with requested entries of inverse of `A` 3633 3634 Level: beginner 3635 3636 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()` 3637 @*/ 3638 PetscErrorCode MatMumpsGetInverse(Mat F, Mat spRHS) 3639 { 3640 PetscFunctionBegin; 3641 PetscValidType(F, 1); 3642 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3643 PetscUseMethod(F, "MatMumpsGetInverse_C", (Mat, Mat), (F, spRHS)); 3644 PetscFunctionReturn(PETSC_SUCCESS); 3645 } 3646 3647 static PetscErrorCode MatMumpsGetInverseTranspose_MUMPS(Mat F, Mat spRHST) 3648 { 3649 Mat spRHS; 3650 3651 PetscFunctionBegin; 3652 PetscCall(MatCreateTranspose(spRHST, &spRHS)); 3653 PetscCall(MatMumpsGetInverse_MUMPS(F, spRHS)); 3654 PetscCall(MatDestroy(&spRHS)); 3655 PetscFunctionReturn(PETSC_SUCCESS); 3656 } 3657 3658 /*@ 3659 MatMumpsGetInverseTranspose - Get user-specified set of entries in inverse of matrix $A^T $ <https://mumps-solver.org/index.php?page=doc> 3660 3661 Logically Collective 3662 3663 Input Parameter: 3664 . F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3665 3666 Output Parameter: 3667 . spRHST - sequential sparse matrix in `MATAIJ` format containing the requested entries of inverse of `A`^T 3668 3669 Level: beginner 3670 3671 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatCreateTranspose()`, `MatMumpsGetInverse()` 3672 @*/ 3673 PetscErrorCode MatMumpsGetInverseTranspose(Mat F, Mat spRHST) 3674 { 3675 PetscBool flg; 3676 3677 PetscFunctionBegin; 3678 PetscValidType(F, 1); 3679 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3680 PetscCall(PetscObjectTypeCompareAny((PetscObject)spRHST, &flg, MATSEQAIJ, MATMPIAIJ, NULL)); 3681 PetscCheck(flg, PetscObjectComm((PetscObject)spRHST), PETSC_ERR_ARG_WRONG, "Matrix spRHST must be MATAIJ matrix"); 3682 PetscUseMethod(F, "MatMumpsGetInverseTranspose_C", (Mat, Mat), (F, spRHST)); 3683 PetscFunctionReturn(PETSC_SUCCESS); 3684 } 3685 3686 static PetscErrorCode MatMumpsSetBlk_MUMPS(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3687 { 3688 Mat_MUMPS *mumps = (Mat_MUMPS *)F->data; 3689 3690 PetscFunctionBegin; 3691 if (nblk) { 3692 PetscAssertPointer(blkptr, 4); 3693 PetscCall(PetscMUMPSIntCast(nblk, &mumps->id.nblk)); 3694 PetscCall(PetscFree(mumps->id.blkptr)); 3695 PetscCall(PetscMalloc1(nblk + 1, &mumps->id.blkptr)); 3696 for (PetscInt i = 0; i < nblk + 1; ++i) PetscCall(PetscMUMPSIntCast(blkptr[i], mumps->id.blkptr + i)); 3697 // mumps->id.icntl[] might have not been allocated, which is done in MatSetFromOptions_MUMPS(). So we don't assign ICNTL(15). 3698 // We use id.nblk and id.blkptr to know what values to set to ICNTL(15) in MatSetFromOptions_MUMPS(). 3699 // mumps->id.ICNTL(15) = 1; 3700 if (blkvar) { 3701 PetscCall(PetscFree(mumps->id.blkvar)); 3702 PetscCall(PetscMalloc1(F->rmap->N, &mumps->id.blkvar)); 3703 for (PetscInt i = 0; i < F->rmap->N; ++i) PetscCall(PetscMUMPSIntCast(blkvar[i], mumps->id.blkvar + i)); 3704 } 3705 } else { 3706 PetscCall(PetscFree(mumps->id.blkptr)); 3707 PetscCall(PetscFree(mumps->id.blkvar)); 3708 // mumps->id.ICNTL(15) = 0; 3709 mumps->id.nblk = 0; 3710 } 3711 PetscFunctionReturn(PETSC_SUCCESS); 3712 } 3713 3714 /*@ 3715 MatMumpsSetBlk - Set user-specified variable block sizes to be used with `-mat_mumps_icntl_15 1` 3716 3717 Not collective, only relevant on the first process of the MPI communicator 3718 3719 Input Parameters: 3720 + F - the factored matrix of A obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3721 . nblk - the number of blocks 3722 . blkvar - see MUMPS documentation, `blkvar(blkptr(iblk):blkptr(iblk+1)-1)`, (`iblk=1, nblk`) holds the variables associated to block `iblk` 3723 - blkptr - array starting at 1 and of size `nblk + 1` storing the prefix sum of all blocks 3724 3725 Level: advanced 3726 3727 .seealso: [](ch_matrices), `MATSOLVERMUMPS`, `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatSetVariableBlockSizes()` 3728 @*/ 3729 PetscErrorCode MatMumpsSetBlk(Mat F, PetscInt nblk, const PetscInt blkvar[], const PetscInt blkptr[]) 3730 { 3731 PetscFunctionBegin; 3732 PetscValidType(F, 1); 3733 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3734 PetscUseMethod(F, "MatMumpsSetBlk_C", (Mat, PetscInt, const PetscInt[], const PetscInt[]), (F, nblk, blkvar, blkptr)); 3735 PetscFunctionReturn(PETSC_SUCCESS); 3736 } 3737 3738 /*@ 3739 MatMumpsGetInfo - Get MUMPS parameter INFO() <https://mumps-solver.org/index.php?page=doc> 3740 3741 Logically Collective 3742 3743 Input Parameters: 3744 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3745 - icntl - index of MUMPS parameter array INFO() 3746 3747 Output Parameter: 3748 . ival - value of MUMPS INFO(icntl) 3749 3750 Level: beginner 3751 3752 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3753 @*/ 3754 PetscErrorCode MatMumpsGetInfo(Mat F, PetscInt icntl, PetscInt *ival) 3755 { 3756 PetscFunctionBegin; 3757 PetscValidType(F, 1); 3758 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3759 PetscAssertPointer(ival, 3); 3760 PetscUseMethod(F, "MatMumpsGetInfo_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3761 PetscFunctionReturn(PETSC_SUCCESS); 3762 } 3763 3764 /*@ 3765 MatMumpsGetInfog - Get MUMPS parameter INFOG() <https://mumps-solver.org/index.php?page=doc> 3766 3767 Logically Collective 3768 3769 Input Parameters: 3770 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3771 - icntl - index of MUMPS parameter array INFOG() 3772 3773 Output Parameter: 3774 . ival - value of MUMPS INFOG(icntl) 3775 3776 Level: beginner 3777 3778 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()` 3779 @*/ 3780 PetscErrorCode MatMumpsGetInfog(Mat F, PetscInt icntl, PetscInt *ival) 3781 { 3782 PetscFunctionBegin; 3783 PetscValidType(F, 1); 3784 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3785 PetscAssertPointer(ival, 3); 3786 PetscUseMethod(F, "MatMumpsGetInfog_C", (Mat, PetscInt, PetscInt *), (F, icntl, ival)); 3787 PetscFunctionReturn(PETSC_SUCCESS); 3788 } 3789 3790 /*@ 3791 MatMumpsGetRinfo - Get MUMPS parameter RINFO() <https://mumps-solver.org/index.php?page=doc> 3792 3793 Logically Collective 3794 3795 Input Parameters: 3796 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3797 - icntl - index of MUMPS parameter array RINFO() 3798 3799 Output Parameter: 3800 . val - value of MUMPS RINFO(icntl) 3801 3802 Level: beginner 3803 3804 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfog()` 3805 @*/ 3806 PetscErrorCode MatMumpsGetRinfo(Mat F, PetscInt icntl, PetscReal *val) 3807 { 3808 PetscFunctionBegin; 3809 PetscValidType(F, 1); 3810 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3811 PetscAssertPointer(val, 3); 3812 PetscUseMethod(F, "MatMumpsGetRinfo_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3813 PetscFunctionReturn(PETSC_SUCCESS); 3814 } 3815 3816 /*@ 3817 MatMumpsGetRinfog - Get MUMPS parameter RINFOG() <https://mumps-solver.org/index.php?page=doc> 3818 3819 Logically Collective 3820 3821 Input Parameters: 3822 + F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3823 - icntl - index of MUMPS parameter array RINFOG() 3824 3825 Output Parameter: 3826 . val - value of MUMPS RINFOG(icntl) 3827 3828 Level: beginner 3829 3830 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3831 @*/ 3832 PetscErrorCode MatMumpsGetRinfog(Mat F, PetscInt icntl, PetscReal *val) 3833 { 3834 PetscFunctionBegin; 3835 PetscValidType(F, 1); 3836 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3837 PetscAssertPointer(val, 3); 3838 PetscUseMethod(F, "MatMumpsGetRinfog_C", (Mat, PetscInt, PetscReal *), (F, icntl, val)); 3839 PetscFunctionReturn(PETSC_SUCCESS); 3840 } 3841 3842 /*@ 3843 MatMumpsGetNullPivots - Get MUMPS parameter PIVNUL_LIST() <https://mumps-solver.org/index.php?page=doc> 3844 3845 Logically Collective 3846 3847 Input Parameter: 3848 . F - the factored matrix obtained by calling `MatGetFactor()` with a `MatSolverType` of `MATSOLVERMUMPS` and a `MatFactorType` of `MAT_FACTOR_LU` or `MAT_FACTOR_CHOLESKY` 3849 3850 Output Parameters: 3851 + size - local size of the array. The size of the array is non-zero only on MPI rank 0 3852 - array - array of rows with null pivot, these rows follow 0-based indexing. The array gets allocated within the function and the user is responsible 3853 for freeing this array. 3854 3855 Level: beginner 3856 3857 .seealso: [](ch_matrices), `Mat`, `MatGetFactor()`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()` 3858 @*/ 3859 PetscErrorCode MatMumpsGetNullPivots(Mat F, PetscInt *size, PetscInt **array) 3860 { 3861 PetscFunctionBegin; 3862 PetscValidType(F, 1); 3863 PetscCheck(F->factortype, PetscObjectComm((PetscObject)F), PETSC_ERR_ARG_WRONGSTATE, "Only for factored matrix"); 3864 PetscAssertPointer(size, 2); 3865 PetscAssertPointer(array, 3); 3866 PetscUseMethod(F, "MatMumpsGetNullPivots_C", (Mat, PetscInt *, PetscInt **), (F, size, array)); 3867 PetscFunctionReturn(PETSC_SUCCESS); 3868 } 3869 3870 /*MC 3871 MATSOLVERMUMPS - A matrix type providing direct solvers (LU and Cholesky) for 3872 MPI distributed and sequential matrices via the external package MUMPS <https://mumps-solver.org/index.php?page=doc> 3873 3874 Works with `MATAIJ` and `MATSBAIJ` matrices 3875 3876 Use ./configure --download-mumps --download-scalapack --download-parmetis --download-metis --download-ptscotch to have PETSc installed with MUMPS 3877 3878 Use ./configure --with-openmp --download-hwloc (or --with-hwloc) to enable running MUMPS in MPI+OpenMP hybrid mode and non-MUMPS in flat-MPI mode. 3879 See details below. 3880 3881 Use `-pc_type cholesky` or `lu` `-pc_factor_mat_solver_type mumps` to use this direct solver 3882 3883 Options Database Keys: 3884 + -mat_mumps_icntl_1 - ICNTL(1): output stream for error messages 3885 . -mat_mumps_icntl_2 - ICNTL(2): output stream for diagnostic printing, statistics, and warning 3886 . -mat_mumps_icntl_3 - ICNTL(3): output stream for global information, collected on the host 3887 . -mat_mumps_icntl_4 - ICNTL(4): level of printing (0 to 4) 3888 . -mat_mumps_icntl_6 - ICNTL(6): permutes to a zero-free diagonal and/or scale the matrix (0 to 7) 3889 . -mat_mumps_icntl_7 - ICNTL(7): computes a symmetric permutation in sequential analysis, 0=AMD, 2=AMF, 3=Scotch, 4=PORD, 5=Metis, 6=QAMD, and 7=auto 3890 Use -pc_factor_mat_ordering_type <type> to have PETSc perform the ordering (sequential only) 3891 . -mat_mumps_icntl_8 - ICNTL(8): scaling strategy (-2 to 8 or 77) 3892 . -mat_mumps_icntl_10 - ICNTL(10): max num of refinements 3893 . -mat_mumps_icntl_11 - ICNTL(11): statistics related to an error analysis (via -ksp_view) 3894 . -mat_mumps_icntl_12 - ICNTL(12): an ordering strategy for symmetric matrices (0 to 3) 3895 . -mat_mumps_icntl_13 - ICNTL(13): parallelism of the root node (enable ScaLAPACK) and its splitting 3896 . -mat_mumps_icntl_14 - ICNTL(14): percentage increase in the estimated working space 3897 . -mat_mumps_icntl_15 - ICNTL(15): compression of the input matrix resulting from a block format 3898 . -mat_mumps_icntl_19 - ICNTL(19): computes the Schur complement 3899 . -mat_mumps_icntl_20 - ICNTL(20): give MUMPS centralized (0) or distributed (10) dense RHS 3900 . -mat_mumps_icntl_22 - ICNTL(22): in-core/out-of-core factorization and solve (0 or 1) 3901 . -mat_mumps_icntl_23 - ICNTL(23): max size of the working memory (MB) that can allocate per processor 3902 . -mat_mumps_icntl_24 - ICNTL(24): detection of null pivot rows (0 or 1) 3903 . -mat_mumps_icntl_25 - ICNTL(25): compute a solution of a deficient matrix and a null space basis 3904 . -mat_mumps_icntl_26 - ICNTL(26): drives the solution phase if a Schur complement matrix 3905 . -mat_mumps_icntl_28 - ICNTL(28): use 1 for sequential analysis and ICNTL(7) ordering, or 2 for parallel analysis and ICNTL(29) ordering 3906 . -mat_mumps_icntl_29 - ICNTL(29): parallel ordering 1 = ptscotch, 2 = parmetis 3907 . -mat_mumps_icntl_30 - ICNTL(30): compute user-specified set of entries in inv(A) 3908 . -mat_mumps_icntl_31 - ICNTL(31): indicates which factors may be discarded during factorization 3909 . -mat_mumps_icntl_33 - ICNTL(33): compute determinant 3910 . -mat_mumps_icntl_35 - ICNTL(35): level of activation of BLR (Block Low-Rank) feature 3911 . -mat_mumps_icntl_36 - ICNTL(36): controls the choice of BLR factorization variant 3912 . -mat_mumps_icntl_37 - ICNTL(37): compression of the contribution blocks (CB) 3913 . -mat_mumps_icntl_38 - ICNTL(38): sets the estimated compression rate of LU factors with BLR 3914 . -mat_mumps_icntl_48 - ICNTL(48): multithreading with tree parallelism 3915 . -mat_mumps_icntl_49 - ICNTL(49): compact workarray at the end of factorization phase 3916 . -mat_mumps_icntl_58 - ICNTL(58): options for symbolic factorization 3917 . -mat_mumps_cntl_1 - CNTL(1): relative pivoting threshold 3918 . -mat_mumps_cntl_2 - CNTL(2): stopping criterion of refinement 3919 . -mat_mumps_cntl_3 - CNTL(3): absolute pivoting threshold 3920 . -mat_mumps_cntl_4 - CNTL(4): value for static pivoting 3921 . -mat_mumps_cntl_5 - CNTL(5): fixation for null pivots 3922 . -mat_mumps_cntl_7 - CNTL(7): precision of the dropping parameter used during BLR factorization 3923 - -mat_mumps_use_omp_threads [m] - run MUMPS in MPI+OpenMP hybrid mode as if omp_set_num_threads(m) is called before calling MUMPS. 3924 Default might be the number of cores per CPU package (socket) as reported by hwloc and suggested by the MUMPS manual. 3925 3926 Level: beginner 3927 3928 Notes: 3929 MUMPS Cholesky does not handle (complex) Hermitian matrices (see User's Guide at <https://mumps-solver.org/index.php?page=doc>) so using it will 3930 error if the matrix is Hermitian. 3931 3932 When used within a `KSP`/`PC` solve the options are prefixed with that of the `PC`. Otherwise one can set the options prefix by calling 3933 `MatSetOptionsPrefixFactor()` on the matrix from which the factor was obtained or `MatSetOptionsPrefix()` on the factor matrix. 3934 3935 When a MUMPS factorization fails inside a KSP solve, for example with a `KSP_DIVERGED_PC_FAILED`, one can find the MUMPS information about 3936 the failure with 3937 .vb 3938 KSPGetPC(ksp,&pc); 3939 PCFactorGetMatrix(pc,&mat); 3940 MatMumpsGetInfo(mat,....); 3941 MatMumpsGetInfog(mat,....); etc. 3942 .ve 3943 Or run with `-ksp_error_if_not_converged` and the program will be stopped and the information printed in the error message. 3944 3945 MUMPS provides 64-bit integer support in two build modes: 3946 full 64-bit: here MUMPS is built with C preprocessing flag -DINTSIZE64 and Fortran compiler option -i8, -fdefault-integer-8 or equivalent, and 3947 requires all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS built the same way with 64-bit integers (for example ILP64 Intel MKL and MPI). 3948 3949 selective 64-bit: with the default MUMPS build, 64-bit integers have been introduced where needed. In compressed sparse row (CSR) storage of matrices, 3950 MUMPS stores column indices in 32-bit, but row offsets in 64-bit, so you can have a huge number of non-zeros, but must have less than 2^31 rows and 3951 columns. This can lead to significant memory and performance gains with respect to a full 64-bit integer MUMPS version. This requires a regular (32-bit 3952 integer) build of all dependent libraries MPI, ScaLAPACK, LAPACK and BLAS. 3953 3954 With --download-mumps=1, PETSc always build MUMPS in selective 64-bit mode, which can be used by both --with-64-bit-indices=0/1 variants of PETSc. 3955 3956 Two modes to run MUMPS/PETSc with OpenMP 3957 .vb 3958 Set `OMP_NUM_THREADS` and run with fewer MPI ranks than cores. For example, if you want to have 16 OpenMP 3959 threads per rank, then you may use "export `OMP_NUM_THREADS` = 16 && mpirun -n 4 ./test". 3960 .ve 3961 3962 .vb 3963 `-mat_mumps_use_omp_threads` [m] and run your code with as many MPI ranks as the number of cores. For example, 3964 if a compute node has 32 cores and you run on two nodes, you may use "mpirun -n 64 ./test -mat_mumps_use_omp_threads 16" 3965 .ve 3966 3967 To run MUMPS in MPI+OpenMP hybrid mode (i.e., enable multithreading in MUMPS), but still run the non-MUMPS part 3968 (i.e., PETSc part) of your code in the so-called flat-MPI (aka pure-MPI) mode, you need to configure PETSc with `--with-openmp` `--download-hwloc` 3969 (or `--with-hwloc`), and have an MPI that supports MPI-3.0's process shared memory (which is usually available). Since MUMPS calls BLAS 3970 libraries, to really get performance, you should have multithreaded BLAS libraries such as Intel MKL, AMD ACML, Cray libSci or OpenBLAS 3971 (PETSc will automatically try to utilized a threaded BLAS if `--with-openmp` is provided). 3972 3973 If you run your code through a job submission system, there are caveats in MPI rank mapping. We use MPI_Comm_split_type() to obtain MPI 3974 processes on each compute node. Listing the processes in rank ascending order, we split processes on a node into consecutive groups of 3975 size m and create a communicator called omp_comm for each group. Rank 0 in an omp_comm is called the master rank, and others in the omp_comm 3976 are called slave ranks (or slaves). Only master ranks are seen to MUMPS and slaves are not. We will free CPUs assigned to slaves (might be set 3977 by CPU binding policies in job scripts) and make the CPUs available to the master so that OMP threads spawned by MUMPS can run on the CPUs. 3978 In a multi-socket compute node, MPI rank mapping is an issue. Still use the above example and suppose your compute node has two sockets, 3979 if you interleave MPI ranks on the two sockets, in other words, even ranks are placed on socket 0, and odd ranks are on socket 1, and bind 3980 MPI ranks to cores, then with `-mat_mumps_use_omp_threads` 16, a master rank (and threads it spawns) will use half cores in socket 0, and half 3981 cores in socket 1, that definitely hurts locality. On the other hand, if you map MPI ranks consecutively on the two sockets, then the 3982 problem will not happen. Therefore, when you use `-mat_mumps_use_omp_threads`, you need to keep an eye on your MPI rank mapping and CPU binding. 3983 For example, with the Slurm job scheduler, one can use srun `--cpu-bind`=verbose -m block:block to map consecutive MPI ranks to sockets and 3984 examine the mapping result. 3985 3986 PETSc does not control thread binding in MUMPS. So to get best performance, one still has to set `OMP_PROC_BIND` and `OMP_PLACES` in job scripts, 3987 for example, export `OMP_PLACES`=threads and export `OMP_PROC_BIND`=spread. One does not need to export `OMP_NUM_THREADS`=m in job scripts as PETSc 3988 calls `omp_set_num_threads`(m) internally before calling MUMPS. 3989 3990 See {cite}`heroux2011bi` and {cite}`gutierrez2017accommodating` 3991 3992 .seealso: [](ch_matrices), `Mat`, `PCFactorSetMatSolverType()`, `MatSolverType`, `MatMumpsSetIcntl()`, `MatMumpsGetIcntl()`, `MatMumpsSetCntl()`, `MatMumpsGetCntl()`, `MatMumpsGetInfo()`, `MatMumpsGetInfog()`, `MatMumpsGetRinfo()`, `MatMumpsGetRinfog()`, `MatMumpsSetBlk()`, `KSPGetPC()`, `PCFactorGetMatrix()` 3993 M*/ 3994 3995 static PetscErrorCode MatFactorGetSolverType_mumps(PETSC_UNUSED Mat A, MatSolverType *type) 3996 { 3997 PetscFunctionBegin; 3998 *type = MATSOLVERMUMPS; 3999 PetscFunctionReturn(PETSC_SUCCESS); 4000 } 4001 4002 /* MatGetFactor for Seq and MPI AIJ matrices */ 4003 static PetscErrorCode MatGetFactor_aij_mumps(Mat A, MatFactorType ftype, Mat *F) 4004 { 4005 Mat B; 4006 Mat_MUMPS *mumps; 4007 PetscBool isSeqAIJ, isDiag, isDense; 4008 PetscMPIInt size; 4009 4010 PetscFunctionBegin; 4011 #if defined(PETSC_USE_COMPLEX) 4012 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4013 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4014 *F = NULL; 4015 PetscFunctionReturn(PETSC_SUCCESS); 4016 } 4017 #endif 4018 /* Create the factorization matrix */ 4019 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isSeqAIJ)); 4020 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATDIAGONAL, &isDiag)); 4021 PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4022 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4023 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4024 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4025 PetscCall(MatSetUp(B)); 4026 4027 PetscCall(PetscNew(&mumps)); 4028 4029 B->ops->view = MatView_MUMPS; 4030 B->ops->getinfo = MatGetInfo_MUMPS; 4031 4032 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4033 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4034 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4035 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4036 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4037 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4038 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4039 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4040 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4041 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4042 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4043 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4044 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4045 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4046 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4047 4048 if (ftype == MAT_FACTOR_LU) { 4049 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4050 B->factortype = MAT_FACTOR_LU; 4051 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqaij; 4052 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4053 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4054 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpiaij; 4055 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4056 mumps->sym = 0; 4057 } else { 4058 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4059 B->factortype = MAT_FACTOR_CHOLESKY; 4060 if (isSeqAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqaij_seqsbaij; 4061 else if (isDiag) mumps->ConvertToTriples = MatConvertToTriples_diagonal_xaij; 4062 else if (isDense) mumps->ConvertToTriples = MatConvertToTriples_dense_xaij; 4063 else mumps->ConvertToTriples = MatConvertToTriples_mpiaij_mpisbaij; 4064 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4065 #if defined(PETSC_USE_COMPLEX) 4066 mumps->sym = 2; 4067 #else 4068 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4069 else mumps->sym = 2; 4070 #endif 4071 } 4072 4073 /* set solvertype */ 4074 PetscCall(PetscFree(B->solvertype)); 4075 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4076 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4077 if (size == 1) { 4078 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4079 B->canuseordering = PETSC_TRUE; 4080 } 4081 B->ops->destroy = MatDestroy_MUMPS; 4082 B->data = (void *)mumps; 4083 4084 *F = B; 4085 mumps->id.job = JOB_NULL; 4086 mumps->ICNTL_pre = NULL; 4087 mumps->CNTL_pre = NULL; 4088 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4089 PetscFunctionReturn(PETSC_SUCCESS); 4090 } 4091 4092 /* MatGetFactor for Seq and MPI SBAIJ matrices */ 4093 static PetscErrorCode MatGetFactor_sbaij_mumps(Mat A, PETSC_UNUSED MatFactorType ftype, Mat *F) 4094 { 4095 Mat B; 4096 Mat_MUMPS *mumps; 4097 PetscBool isSeqSBAIJ; 4098 PetscMPIInt size; 4099 4100 PetscFunctionBegin; 4101 #if defined(PETSC_USE_COMPLEX) 4102 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4103 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4104 *F = NULL; 4105 PetscFunctionReturn(PETSC_SUCCESS); 4106 } 4107 #endif 4108 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4109 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4110 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4111 PetscCall(MatSetUp(B)); 4112 4113 PetscCall(PetscNew(&mumps)); 4114 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSBAIJ, &isSeqSBAIJ)); 4115 if (isSeqSBAIJ) { 4116 mumps->ConvertToTriples = MatConvertToTriples_seqsbaij_seqsbaij; 4117 } else { 4118 mumps->ConvertToTriples = MatConvertToTriples_mpisbaij_mpisbaij; 4119 } 4120 4121 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4122 B->ops->view = MatView_MUMPS; 4123 B->ops->getinfo = MatGetInfo_MUMPS; 4124 4125 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4126 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4127 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4128 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4129 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4130 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4131 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4132 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4133 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4134 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4135 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4136 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4137 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4138 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4139 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4140 4141 B->factortype = MAT_FACTOR_CHOLESKY; 4142 #if defined(PETSC_USE_COMPLEX) 4143 mumps->sym = 2; 4144 #else 4145 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4146 else mumps->sym = 2; 4147 #endif 4148 4149 /* set solvertype */ 4150 PetscCall(PetscFree(B->solvertype)); 4151 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4152 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4153 if (size == 1) { 4154 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4155 B->canuseordering = PETSC_TRUE; 4156 } 4157 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_CHOLESKY])); 4158 B->ops->destroy = MatDestroy_MUMPS; 4159 B->data = (void *)mumps; 4160 4161 *F = B; 4162 mumps->id.job = JOB_NULL; 4163 mumps->ICNTL_pre = NULL; 4164 mumps->CNTL_pre = NULL; 4165 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4166 PetscFunctionReturn(PETSC_SUCCESS); 4167 } 4168 4169 static PetscErrorCode MatGetFactor_baij_mumps(Mat A, MatFactorType ftype, Mat *F) 4170 { 4171 Mat B; 4172 Mat_MUMPS *mumps; 4173 PetscBool isSeqBAIJ; 4174 PetscMPIInt size; 4175 4176 PetscFunctionBegin; 4177 /* Create the factorization matrix */ 4178 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQBAIJ, &isSeqBAIJ)); 4179 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4180 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4181 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4182 PetscCall(MatSetUp(B)); 4183 4184 PetscCall(PetscNew(&mumps)); 4185 PetscCheck(ftype == MAT_FACTOR_LU, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot use PETSc BAIJ matrices with MUMPS Cholesky, use SBAIJ or AIJ matrix instead"); 4186 B->ops->lufactorsymbolic = MatLUFactorSymbolic_BAIJMUMPS; 4187 B->factortype = MAT_FACTOR_LU; 4188 if (isSeqBAIJ) mumps->ConvertToTriples = MatConvertToTriples_seqbaij_seqaij; 4189 else mumps->ConvertToTriples = MatConvertToTriples_mpibaij_mpiaij; 4190 mumps->sym = 0; 4191 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4192 4193 B->ops->view = MatView_MUMPS; 4194 B->ops->getinfo = MatGetInfo_MUMPS; 4195 4196 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4197 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4198 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4199 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4200 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4201 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4202 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4203 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4204 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4205 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4206 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4207 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4208 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4209 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4210 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4211 4212 /* set solvertype */ 4213 PetscCall(PetscFree(B->solvertype)); 4214 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4215 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4216 if (size == 1) { 4217 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4218 B->canuseordering = PETSC_TRUE; 4219 } 4220 B->ops->destroy = MatDestroy_MUMPS; 4221 B->data = (void *)mumps; 4222 4223 *F = B; 4224 mumps->id.job = JOB_NULL; 4225 mumps->ICNTL_pre = NULL; 4226 mumps->CNTL_pre = NULL; 4227 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4228 PetscFunctionReturn(PETSC_SUCCESS); 4229 } 4230 4231 /* MatGetFactor for Seq and MPI SELL matrices */ 4232 static PetscErrorCode MatGetFactor_sell_mumps(Mat A, MatFactorType ftype, Mat *F) 4233 { 4234 Mat B; 4235 Mat_MUMPS *mumps; 4236 PetscBool isSeqSELL; 4237 PetscMPIInt size; 4238 4239 PetscFunctionBegin; 4240 /* Create the factorization matrix */ 4241 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQSELL, &isSeqSELL)); 4242 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4243 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4244 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4245 PetscCall(MatSetUp(B)); 4246 4247 PetscCall(PetscNew(&mumps)); 4248 4249 B->ops->view = MatView_MUMPS; 4250 B->ops->getinfo = MatGetInfo_MUMPS; 4251 4252 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4253 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4254 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4255 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4256 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4257 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4258 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4259 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4260 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4261 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4262 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4263 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4264 4265 PetscCheck(ftype == MAT_FACTOR_LU, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4266 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4267 B->factortype = MAT_FACTOR_LU; 4268 PetscCheck(isSeqSELL, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "To be implemented"); 4269 mumps->ConvertToTriples = MatConvertToTriples_seqsell_seqaij; 4270 mumps->sym = 0; 4271 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[MAT_FACTOR_LU])); 4272 4273 /* set solvertype */ 4274 PetscCall(PetscFree(B->solvertype)); 4275 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4276 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4277 if (size == 1) { 4278 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4279 B->canuseordering = PETSC_TRUE; 4280 } 4281 B->ops->destroy = MatDestroy_MUMPS; 4282 B->data = (void *)mumps; 4283 4284 *F = B; 4285 mumps->id.job = JOB_NULL; 4286 mumps->ICNTL_pre = NULL; 4287 mumps->CNTL_pre = NULL; 4288 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4289 PetscFunctionReturn(PETSC_SUCCESS); 4290 } 4291 4292 /* MatGetFactor for MATNEST matrices */ 4293 static PetscErrorCode MatGetFactor_nest_mumps(Mat A, MatFactorType ftype, Mat *F) 4294 { 4295 Mat B, **mats; 4296 Mat_MUMPS *mumps; 4297 PetscInt nr, nc; 4298 PetscMPIInt size; 4299 PetscBool flg = PETSC_TRUE; 4300 4301 PetscFunctionBegin; 4302 #if defined(PETSC_USE_COMPLEX) 4303 if (ftype == MAT_FACTOR_CHOLESKY && A->hermitian == PETSC_BOOL3_TRUE && A->symmetric != PETSC_BOOL3_TRUE) { 4304 PetscCall(PetscInfo(A, "Hermitian MAT_FACTOR_CHOLESKY is not supported. Use MAT_FACTOR_LU instead.\n")); 4305 *F = NULL; 4306 PetscFunctionReturn(PETSC_SUCCESS); 4307 } 4308 #endif 4309 4310 /* Return if some condition is not satisfied */ 4311 *F = NULL; 4312 PetscCall(MatNestGetSubMats(A, &nr, &nc, &mats)); 4313 if (ftype == MAT_FACTOR_CHOLESKY) { 4314 IS *rows, *cols; 4315 PetscInt *m, *M; 4316 4317 PetscCheck(nr == nc, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MAT_FACTOR_CHOLESKY not supported for nest sizes %" PetscInt_FMT " != %" PetscInt_FMT ". Use MAT_FACTOR_LU.", nr, nc); 4318 PetscCall(PetscMalloc2(nr, &rows, nc, &cols)); 4319 PetscCall(MatNestGetISs(A, rows, cols)); 4320 for (PetscInt r = 0; flg && r < nr; r++) PetscCall(ISEqualUnsorted(rows[r], cols[r], &flg)); 4321 if (!flg) { 4322 PetscCall(PetscFree2(rows, cols)); 4323 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for unequal row and column maps. Use MAT_FACTOR_LU.\n")); 4324 PetscFunctionReturn(PETSC_SUCCESS); 4325 } 4326 PetscCall(PetscMalloc2(nr, &m, nr, &M)); 4327 for (PetscInt r = 0; r < nr; r++) PetscCall(ISGetMinMax(rows[r], &m[r], &M[r])); 4328 for (PetscInt r = 0; flg && r < nr; r++) 4329 for (PetscInt k = r + 1; flg && k < nr; k++) 4330 if ((m[k] <= m[r] && m[r] <= M[k]) || (m[k] <= M[r] && M[r] <= M[k])) flg = PETSC_FALSE; 4331 PetscCall(PetscFree2(m, M)); 4332 PetscCall(PetscFree2(rows, cols)); 4333 if (!flg) { 4334 PetscCall(PetscInfo(A, "MAT_FACTOR_CHOLESKY not supported for intersecting row maps. Use MAT_FACTOR_LU.\n")); 4335 PetscFunctionReturn(PETSC_SUCCESS); 4336 } 4337 } 4338 4339 for (PetscInt r = 0; r < nr; r++) { 4340 for (PetscInt c = 0; c < nc; c++) { 4341 Mat sub = mats[r][c]; 4342 PetscBool isSeqAIJ, isMPIAIJ, isSeqBAIJ, isMPIBAIJ, isSeqSBAIJ, isMPISBAIJ, isDiag, isDense; 4343 4344 if (!sub || (ftype == MAT_FACTOR_CHOLESKY && c < r)) continue; 4345 PetscCall(MatGetTranspose_TransposeVirtual(&sub, NULL, NULL, NULL, NULL)); 4346 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQAIJ, &isSeqAIJ)); 4347 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIAIJ, &isMPIAIJ)); 4348 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQBAIJ, &isSeqBAIJ)); 4349 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPIBAIJ, &isMPIBAIJ)); 4350 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATSEQSBAIJ, &isSeqSBAIJ)); 4351 PetscCall(PetscObjectBaseTypeCompare((PetscObject)sub, MATMPISBAIJ, &isMPISBAIJ)); 4352 PetscCall(PetscObjectTypeCompare((PetscObject)sub, MATDIAGONAL, &isDiag)); 4353 PetscCall(PetscObjectTypeCompareAny((PetscObject)sub, &isDense, MATSEQDENSE, MATMPIDENSE, NULL)); 4354 if (ftype == MAT_FACTOR_CHOLESKY) { 4355 if (r == c) { 4356 if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isSeqSBAIJ && !isMPISBAIJ && !isDiag && !isDense) { 4357 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4358 flg = PETSC_FALSE; 4359 } 4360 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4361 PetscCall(PetscInfo(sub, "MAT_FACTOR_CHOLESKY not supported for off-diagonal block of type %s.\n", ((PetscObject)sub)->type_name)); 4362 flg = PETSC_FALSE; 4363 } 4364 } else if (!isSeqAIJ && !isMPIAIJ && !isSeqBAIJ && !isMPIBAIJ && !isDiag && !isDense) { 4365 PetscCall(PetscInfo(sub, "MAT_FACTOR_LU not supported for block of type %s.\n", ((PetscObject)sub)->type_name)); 4366 flg = PETSC_FALSE; 4367 } 4368 } 4369 } 4370 if (!flg) PetscFunctionReturn(PETSC_SUCCESS); 4371 4372 /* Create the factorization matrix */ 4373 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 4374 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 4375 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &((PetscObject)B)->type_name)); 4376 PetscCall(MatSetUp(B)); 4377 4378 PetscCall(PetscNew(&mumps)); 4379 4380 B->ops->view = MatView_MUMPS; 4381 B->ops->getinfo = MatGetInfo_MUMPS; 4382 4383 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorGetSolverType_C", MatFactorGetSolverType_mumps)); 4384 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorSetSchurIS_C", MatFactorSetSchurIS_MUMPS)); 4385 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatFactorCreateSchurComplement_C", MatFactorCreateSchurComplement_MUMPS)); 4386 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetIcntl_C", MatMumpsSetIcntl_MUMPS)); 4387 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetIcntl_C", MatMumpsGetIcntl_MUMPS)); 4388 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetCntl_C", MatMumpsSetCntl_MUMPS)); 4389 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetCntl_C", MatMumpsGetCntl_MUMPS)); 4390 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfo_C", MatMumpsGetInfo_MUMPS)); 4391 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInfog_C", MatMumpsGetInfog_MUMPS)); 4392 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfo_C", MatMumpsGetRinfo_MUMPS)); 4393 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetRinfog_C", MatMumpsGetRinfog_MUMPS)); 4394 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetNullPivots_C", MatMumpsGetNullPivots_MUMPS)); 4395 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverse_C", MatMumpsGetInverse_MUMPS)); 4396 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsGetInverseTranspose_C", MatMumpsGetInverseTranspose_MUMPS)); 4397 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMumpsSetBlk_C", MatMumpsSetBlk_MUMPS)); 4398 4399 if (ftype == MAT_FACTOR_LU) { 4400 B->ops->lufactorsymbolic = MatLUFactorSymbolic_AIJMUMPS; 4401 B->factortype = MAT_FACTOR_LU; 4402 mumps->sym = 0; 4403 } else { 4404 B->ops->choleskyfactorsymbolic = MatCholeskyFactorSymbolic_MUMPS; 4405 B->factortype = MAT_FACTOR_CHOLESKY; 4406 #if defined(PETSC_USE_COMPLEX) 4407 mumps->sym = 2; 4408 #else 4409 if (A->spd == PETSC_BOOL3_TRUE) mumps->sym = 1; 4410 else mumps->sym = 2; 4411 #endif 4412 } 4413 mumps->ConvertToTriples = MatConvertToTriples_nest_xaij; 4414 PetscCall(PetscStrallocpy(MATORDERINGEXTERNAL, (char **)&B->preferredordering[ftype])); 4415 4416 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 4417 if (size == 1) { 4418 /* MUMPS option -mat_mumps_icntl_7 1 is automatically set if PETSc ordering is passed into symbolic factorization */ 4419 B->canuseordering = PETSC_TRUE; 4420 } 4421 4422 /* set solvertype */ 4423 PetscCall(PetscFree(B->solvertype)); 4424 PetscCall(PetscStrallocpy(MATSOLVERMUMPS, &B->solvertype)); 4425 B->ops->destroy = MatDestroy_MUMPS; 4426 B->data = (void *)mumps; 4427 4428 *F = B; 4429 mumps->id.job = JOB_NULL; 4430 mumps->ICNTL_pre = NULL; 4431 mumps->CNTL_pre = NULL; 4432 mumps->matstruc = DIFFERENT_NONZERO_PATTERN; 4433 PetscFunctionReturn(PETSC_SUCCESS); 4434 } 4435 4436 PETSC_INTERN PetscErrorCode MatSolverTypeRegister_MUMPS(void) 4437 { 4438 PetscFunctionBegin; 4439 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4440 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4441 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4442 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4443 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPISBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4444 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4445 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4446 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_LU, MatGetFactor_baij_mumps)); 4447 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_baij_mumps)); 4448 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSBAIJ, MAT_FACTOR_CHOLESKY, MatGetFactor_sbaij_mumps)); 4449 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQSELL, MAT_FACTOR_LU, MatGetFactor_sell_mumps)); 4450 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4451 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATDIAGONAL, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4452 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4453 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATSEQDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4454 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_LU, MatGetFactor_aij_mumps)); 4455 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATMPIDENSE, MAT_FACTOR_CHOLESKY, MatGetFactor_aij_mumps)); 4456 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_LU, MatGetFactor_nest_mumps)); 4457 PetscCall(MatSolverTypeRegister(MATSOLVERMUMPS, MATNEST, MAT_FACTOR_CHOLESKY, MatGetFactor_nest_mumps)); 4458 PetscFunctionReturn(PETSC_SUCCESS); 4459 } 4460